From 919c14ef132986aa1514b2070ce6d19b5579a6ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89anna=20=C3=93=20Cath=C3=A1in?= Date: Mon, 14 Sep 2020 17:36:49 +0100 Subject: MLECO-929 Add Object Detection sample application using the public ArmNN C++ API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I14aa1b4b726212cffbefd6687203f93f936fa872 Signed-off-by: Éanna Ó Catháin --- samples/CMakeLists.txt | 1 + samples/ObjectDetection/CMakeLists.txt | 66 +++ samples/ObjectDetection/Readme.md | 453 +++++++++++++++++++++ .../ObjectDetection/cmake/aarch64-toolchain.cmake | 20 + .../cmake/arm-linux-gnueabihf-toolchain.cmake | 20 + samples/ObjectDetection/cmake/find_armnn.cmake | 35 ++ samples/ObjectDetection/cmake/find_catch.cmake | 16 + samples/ObjectDetection/cmake/find_opencv.cmake | 204 ++++++++++ samples/ObjectDetection/cmake/unit_tests.cmake | 65 +++ .../include/ArmnnNetworkExecutor.hpp | 80 ++++ samples/ObjectDetection/include/BoundingBox.hpp | 108 +++++ samples/ObjectDetection/include/CmdArgsParser.hpp | 50 +++ .../ObjectDetection/include/CvVideoFileWriter.hpp | 61 +++ .../ObjectDetection/include/CvVideoFrameReader.hpp | 108 +++++ samples/ObjectDetection/include/CvWindowOutput.hpp | 53 +++ samples/ObjectDetection/include/DetectedObject.hpp | 96 +++++ .../include/IDetectionResultDecoder.hpp | 39 ++ samples/ObjectDetection/include/IFrameOutput.hpp | 48 +++ samples/ObjectDetection/include/IFrameReader.hpp | 45 ++ samples/ObjectDetection/include/ImageUtils.hpp | 58 +++ .../ObjectDetection/include/NetworkPipeline.hpp | 148 +++++++ .../ObjectDetection/include/NonMaxSuppression.hpp | 28 ++ .../ObjectDetection/include/SSDResultDecoder.hpp | 32 ++ samples/ObjectDetection/include/Types.hpp | 50 +++ .../ObjectDetection/include/YoloResultDecoder.hpp | 43 ++ .../ObjectDetection/src/ArmnnNetworkExecutor.cpp | 140 +++++++ samples/ObjectDetection/src/BoundingBox.cpp | 116 ++++++ samples/ObjectDetection/src/CmdArgsParser.cpp | 70 ++++ samples/ObjectDetection/src/CvVideoFileWriter.cpp | 38 ++ samples/ObjectDetection/src/CvVideoFrameReader.cpp | 98 +++++ samples/ObjectDetection/src/CvWindowOutput.cpp | 33 ++ samples/ObjectDetection/src/DetectedObject.cpp | 65 +++ samples/ObjectDetection/src/ImageUtils.cpp | 126 ++++++ samples/ObjectDetection/src/Main.cpp | 160 ++++++++ samples/ObjectDetection/src/NetworkPipeline.cpp | 102 +++++ samples/ObjectDetection/src/NonMaxSuppression.cpp | 92 +++++ samples/ObjectDetection/src/SSDResultDecoder.cpp | 80 ++++ samples/ObjectDetection/src/YoloResultDecoder.cpp | 100 +++++ samples/ObjectDetection/test/BoundingBoxTests.cpp | 177 ++++++++ samples/ObjectDetection/test/FrameReaderTest.cpp | 103 +++++ samples/ObjectDetection/test/ImageUtilsTest.cpp | 128 ++++++ samples/ObjectDetection/test/NMSTests.cpp | 90 ++++ samples/ObjectDetection/test/PipelineTest.cpp | 60 +++ 43 files changed, 3705 insertions(+) create mode 100644 samples/ObjectDetection/CMakeLists.txt create mode 100644 samples/ObjectDetection/Readme.md create mode 100644 samples/ObjectDetection/cmake/aarch64-toolchain.cmake create mode 100644 samples/ObjectDetection/cmake/arm-linux-gnueabihf-toolchain.cmake create mode 100644 samples/ObjectDetection/cmake/find_armnn.cmake create mode 100644 samples/ObjectDetection/cmake/find_catch.cmake create mode 100644 samples/ObjectDetection/cmake/find_opencv.cmake create mode 100644 samples/ObjectDetection/cmake/unit_tests.cmake create mode 100644 samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp create mode 100644 samples/ObjectDetection/include/BoundingBox.hpp create mode 100644 samples/ObjectDetection/include/CmdArgsParser.hpp create mode 100644 samples/ObjectDetection/include/CvVideoFileWriter.hpp create mode 100644 samples/ObjectDetection/include/CvVideoFrameReader.hpp create mode 100644 samples/ObjectDetection/include/CvWindowOutput.hpp create mode 100644 samples/ObjectDetection/include/DetectedObject.hpp create mode 100644 samples/ObjectDetection/include/IDetectionResultDecoder.hpp create mode 100644 samples/ObjectDetection/include/IFrameOutput.hpp create mode 100644 samples/ObjectDetection/include/IFrameReader.hpp create mode 100644 samples/ObjectDetection/include/ImageUtils.hpp create mode 100644 samples/ObjectDetection/include/NetworkPipeline.hpp create mode 100644 samples/ObjectDetection/include/NonMaxSuppression.hpp create mode 100644 samples/ObjectDetection/include/SSDResultDecoder.hpp create mode 100644 samples/ObjectDetection/include/Types.hpp create mode 100644 samples/ObjectDetection/include/YoloResultDecoder.hpp create mode 100644 samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp create mode 100644 samples/ObjectDetection/src/BoundingBox.cpp create mode 100644 samples/ObjectDetection/src/CmdArgsParser.cpp create mode 100644 samples/ObjectDetection/src/CvVideoFileWriter.cpp create mode 100644 samples/ObjectDetection/src/CvVideoFrameReader.cpp create mode 100644 samples/ObjectDetection/src/CvWindowOutput.cpp create mode 100644 samples/ObjectDetection/src/DetectedObject.cpp create mode 100644 samples/ObjectDetection/src/ImageUtils.cpp create mode 100644 samples/ObjectDetection/src/Main.cpp create mode 100644 samples/ObjectDetection/src/NetworkPipeline.cpp create mode 100644 samples/ObjectDetection/src/NonMaxSuppression.cpp create mode 100644 samples/ObjectDetection/src/SSDResultDecoder.cpp create mode 100644 samples/ObjectDetection/src/YoloResultDecoder.cpp create mode 100644 samples/ObjectDetection/test/BoundingBoxTests.cpp create mode 100644 samples/ObjectDetection/test/FrameReaderTest.cpp create mode 100644 samples/ObjectDetection/test/ImageUtilsTest.cpp create mode 100644 samples/ObjectDetection/test/NMSTests.cpp create mode 100644 samples/ObjectDetection/test/PipelineTest.cpp (limited to 'samples') diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 2ff556c8c1..ff45eecbe0 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -7,3 +7,4 @@ if(BUILD_SAMPLE_APP AND SAMPLE_DYNAMIC_BACKEND) add_executable(DynamicSample DynamicSample.cpp) target_link_libraries(DynamicSample armnn ${CMAKE_THREAD_LIBS_INIT}) endif() + diff --git a/samples/ObjectDetection/CMakeLists.txt b/samples/ObjectDetection/CMakeLists.txt new file mode 100644 index 0000000000..9e85fabe86 --- /dev/null +++ b/samples/ObjectDetection/CMakeLists.txt @@ -0,0 +1,66 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +cmake_minimum_required(VERSION 3.0.2) + +set(CMAKE_C_STANDARD 99) +set(CMAKE_CXX_STANDARD 14) + +# Make the standard a requirement => prevent fallback to previous +# supported standard +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# We want to pass standard C/C++ flags, without gnu extensions +set(CMAKE_C_EXTENSIONS OFF) +set(CMAKE_CXX_EXTENSIONS OFF) + +project (object_detection_example) + +set(CMAKE_C_FLAGS_DEBUG "-DDEBUG -O0 -g -fPIC") +set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 -fPIC") + +set(CMAKE_CXX_FLAGS_DEBUG "-DDEBUG -O0 -g -fPIC") +set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 -fPIC") + +include(ExternalProject) + +# Build in release mode by default +if (NOT CMAKE_BUILD_TYPE STREQUAL Debug) + set(CMAKE_BUILD_TYPE Release CACHE INTERNAL "") +endif() + +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) + +if (NOT DEFINED DEPENDENCIES_DIR) + set(DEPENDENCIES_DIR ${CMAKE_BINARY_DIR}/dependencies) +endif() + +include(cmake/find_opencv.cmake) +include(cmake/find_armnn.cmake) + +include_directories(include) + +file(GLOB SOURCES "src/*.cpp") +list(REMOVE_ITEM SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/src/Main.cpp) +file(GLOB TEST_SOURCES "test/*.cpp") +file(GLOB APP_MAIN "src/Main.cpp") + +if(BUILD_UNIT_TESTS) + include(cmake/unit_tests.cmake) +endif() + + +set(APP_TARGET_NAME "${CMAKE_PROJECT_NAME}") + +add_executable("${APP_TARGET_NAME}" ${SOURCES} ${APP_MAIN}) + +if (NOT OPENCV_LIBS_FOUND) + message("Building OpenCV libs") + add_dependencies("${APP_TARGET_NAME}" "${OPENCV_LIB}") +endif() + +target_link_libraries("${APP_TARGET_NAME}" PUBLIC ${ARMNN_LIBS} ${OPENCV_LIBS}) +target_include_directories("${APP_TARGET_NAME}" PUBLIC ${ARMNN_INCLUDE_DIR} ${OPENCV_INCLUDE_DIR}) diff --git a/samples/ObjectDetection/Readme.md b/samples/ObjectDetection/Readme.md new file mode 100644 index 0000000000..835bf3adeb --- /dev/null +++ b/samples/ObjectDetection/Readme.md @@ -0,0 +1,453 @@ +# Object Detection Example + +## Introduction +This is a sample code showing object detection using Arm NN public C++ API. The compiled application can take + + * a video file + +as input and + * save a video file + * or output video stream to the window + +with detections shown in bounding boxes, class labels and confidence. + +## Dependencies + +This example utilises OpenCV functions to capture and output video data. Top level inference API is provided by Arm NN +library. + +### Arm NN + +Object detection example build system does not trigger Arm NN compilation. Thus, before building the application, +please ensure that Arm NN libraries and header files are available on your build platform. +The application executable binary dynamically links with the following Arm NN libraries: +* libarmnn.so +* libarmnnTfLiteParser.so + +The build script searches for available Arm NN libraries in the following order: +1. Inside custom user directory specified by ARMNN_LIB_DIR cmake option. +2. Inside the current Arm NN repository, assuming that Arm NN was built following [this instructions](../../BuildGuideCrossCompilation.md). +3. Inside default locations for system libraries, assuming Arm NN was installed from deb packages. + +Arm NN header files will be searched in parent directory of found libraries files under `include` directory, i.e. +libraries found in `/usr/lib` or `/usr/lib64` and header files in `/usr/include` (or `${ARMNN_LIB_DIR}/include`). + +Please see [find_armnn.cmake](./cmake/find_armnn.cmake) for implementation details. + +### OpenCV + +This application uses [OpenCV (Open Source Computer Vision Library)](https://opencv.org/) for video stream processing. +Your host platform may have OpenCV available through linux package manager. If this is the case, please install it using +standard way. If not, our build system has a script to download and cross-compile required OpenCV modules +as well as [FFMPEG](https://ffmpeg.org/) and [x264 encoder](https://www.videolan.org/developers/x264.html) libraries. +The latter will build limited OpenCV functionality and application will support only video file input and video file output +way of working. Displaying video frames in a window requires building OpenCV with GTK and OpenGL support. + +The application executable binary dynamically links with the following OpenCV libraries: +* libopencv_core.so.4.0.0 +* libopencv_imgproc.so.4.0.0 +* libopencv_imgcodecs.so.4.0.0 +* libopencv_videoio.so.4.0.0 +* libopencv_video.so.4.0.0 +* libopencv_highgui.so.4.0.0 + +and transitively depends on: +* libavcodec.so (FFMPEG) +* libavformat.so (FFMPEG) +* libavutil.so (FFMPEG) +* libswscale.so (FFMPEG) +* libx264.so (x264) + +The application searches for above libraries in the following order: +1. Inside custom user directory specified by OPENCV_LIB_DIR cmake option. +2. Inside default locations for system libraries. + +If no OpenCV libraries were found, the cross-compilation build is extended with x264, ffmpeg and OpenCV compilation steps. + +Note: Native build does not add third party libraries to compilation. + +Please see [find_opencv.cmake](./cmake/find_opencv.cmake) for implementation details. + +## Building +There are two flows for building this application: +* native build on a host platform, +* cross-compilation for a Arm-based host platform. + +### Build Options + +* CMAKE_TOOLCHAIN_FILE - choose one of the available cross-compilation toolchain files: + * `cmake/aarch64-toolchain.cmake` + * `cmake/arm-linux-gnueabihf-toolchain.cmake` +* ARMNN_LIB_DIR - point to the custom location of the Arm NN libs and headers. +* OPENCV_LIB_DIR - point to the custom location of the OpenCV libs and headers. +* BUILD_UNIT_TESTS - set to `1` to build tests. Additionally to the main application, `object_detection_example-tests` +unit tests executable will be created. + +### Native Build +To build this application on a host platform, firstly ensure that required dependencies are installed: +For example, for raspberry PI: +```commandline +sudo apt-get update +sudo apt-get -yq install pkg-config +sudo apt-get -yq install libgtk2.0-dev zlib1g-dev libjpeg-dev libpng-dev libxvidcore-dev libx264-dev +sudo apt-get -yq install libavcodec-dev libavformat-dev libswscale-dev +``` + +To build demo application, create a build directory: +```commandline +mkdir build +cd build +``` +If you have already installed Arm NN and OpenCV: + +Inside build directory, run cmake and make commands: +```commandline +cmake .. +make +``` +This will build the following in bin directory: +* object_detection_example - application executable + +If you have custom Arm NN and OpenCV location, use `OPENCV_LIB_DIR` and `ARMNN_LIB_DIR` options: +```commandline +cmake -DARMNN_LIB_DIR=/path/to/armnn -DOPENCV_LIB_DIR=/path/to/opencv .. +make +``` + +### Cross-compilation + +This section will explain how to cross-compile the application and dependencies on a Linux x86 machine +for arm host platforms. + +You will require working cross-compilation toolchain supported by your host platform. For raspberry Pi 3 and 4 with glibc +runtime version 2.28, the following toolchains were successfully used: +* https://releases.linaro.org/components/toolchain/binaries/latest-7/aarch64-linux-gnu/ +* https://releases.linaro.org/components/toolchain/binaries/latest-7/arm-linux-gnueabihf/ + +Choose aarch64-linux-gnu if `lscpu` command shows architecture as aarch64 or arm-linux-gnueabihf if detected +architecture is armv71. + +You can check runtime version on your host platform by running: +``` +ldd --version +``` +On **build machine**, install C and C++ cross compiler toolchains and add them to the PATH variable. + +Install package dependencies: +```commandline +sudo apt-get update +sudo apt-get -yq install pkg-config +``` +Package config is required by OpenCV build to discover FFMPEG libs. + +To build demo application, create a build directory: +```commandline +mkdir build +cd build +``` +Inside build directory, run cmake and make commands: + +**Arm 32bit** +```commandline +cmake -DARMNN_LIB_DIR= -DCMAKE_TOOLCHAIN_FILE=cmake/arm-linux-gnueabihf-toolchain.cmake .. +make +``` +**Arm 64bit** +```commandline +cmake -DARMNN_LIB_DIR= -DCMAKE_TOOLCHAIN_FILE=cmake/aarch64-toolchain.cmake .. +make +``` + +Add `-j` flag to the make command to run compilation in multiple threads. + +From the build directory, copy the following to the host platform: +* bin directory - contains object_detection_example executable, +* lib directory - contains cross-compiled OpenCV, ffmpeg, x264 libraries, +* Your Arm NN libs used during compilation. + +The full list of libs after cross-compilation to copy on your board: +``` +libarmnn.so +libarmnn.so.22 +libarmnn.so.23.0 +libarmnnTfLiteParser.so +libarmnnTfLiteParser.so.22.0 +libavcodec.so +libavcodec.so.58 +libavcodec.so.58.54.100 +libavdevice.so +libavdevice.so.58 +libavdevice.so.58.8.100 +libavfilter.so +libavfilter.so.7 +libavfilter.so.7.57.100 +libavformat.so +libavformat.so.58 +libavformat.so.58.29.100 +libavutil.so +libavutil.so.56 +libavutil.so.56.31.100 +libopencv_core.so +libopencv_core.so.4.0 +libopencv_core.so.4.0.0 +libopencv_highgui.so +libopencv_highgui.so.4.0 +libopencv_highgui.so.4.0.0 +libopencv_imgcodecs.so +libopencv_imgcodecs.so.4.0 +libopencv_imgcodecs.so.4.0.0 +libopencv_imgproc.so +libopencv_imgproc.so.4.0 +libopencv_imgproc.so.4.0.0 +libopencv_video.so +libopencv_video.so.4.0 +libopencv_video.so.4.0.0 +libopencv_videoio.so +libopencv_videoio.so.4.0 +libopencv_videoio.so.4.0.0 +libpostproc.so +libpostproc.so.55 +libpostproc.so.55.5.100 +libswresample.a +libswresample.so +libswresample.so.3 +libswresample.so.3.5.100 +libswscale.so +libswscale.so.5 +libswscale.so.5.5.100 +libx264.so +libx264.so.160 +``` +## Executing + +Once the application executable is built, it can be executed with the following options: +* --video-file-path: Path to the video file to run object detection on **[REQUIRED]** +* --model-file-path: Path to the Object Detection model to use **[REQUIRED]** +* --label-path: Path to the label set for the provided model file **[REQUIRED]** +* --model-name: The name of the model being used. Accepted options: SSD_MOBILE | YOLO_V3_TINY **[REQUIRED]** +* --output-video-file-path: Path to the output video file with detections added in. Defaults to /tmp/output.avi + **[OPTIONAL]** +* --preferred-backends: Takes the preferred backends in preference order, separated by comma. + For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]. + Defaults to CpuRef **[OPTIONAL]** +* --help: Prints all the available options to screen + +### Object Detection on a supplied video file + +To run object detection on a supplied video file and output result to a video file: +```commandline +LD_LIBRARY_PATH=/path/to/armnn/libs:/path/to/opencv/libs ./object_detection_example --label-path /path/to/labels/file + --video-file-path /path/to/video/file --model-file-path /path/to/model/file + --model-name [YOLO_V3_TINY | SSD_MOBILE] --output-video-file-path /path/to/output/file +``` + +To run object detection on a supplied video file and output result to a window gui: +```commandline +LD_LIBRARY_PATH=/path/to/armnn/libs:/path/to/opencv/libs ./object_detection_example --label-path /path/to/labels/file + --video-file-path /path/to/video/file --model-file-path /path/to/model/file + --model-name [YOLO_V3_TINY | SSD_MOBILE] +``` + +This application has been verified to work against the MobileNet SSD model, which can be downloaded along with it's label set from: +* https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip + +--- + +# Application Overview +This section provides a walkthrough of the application, explaining in detail the steps: +1. Initialisation + 1. Reading from Video Source + 2. Preparing Labels and Model Specific Functions +2. Creating a Network + 1. Creating Parser and Importing Graph + 3. Optimizing Graph for Compute Device + 4. Creating Input and Output Binding Information +3. Object detection pipeline + 1. Pre-processing the Captured Frame + 2. Making Input and Output Tensors + 3. Executing Inference + 4. Postprocessing + 5. Decoding and Processing Inference Output + 6. Drawing Bounding Boxes + + +### Initialisation + +##### Reading from Video Source +After parsing user arguments, the chosen video file or stream is loaded into an OpenCV `cv::VideoCapture` object. +We use [`IFrameReader`](./include/IFrameReader.hpp) interface and OpenCV specific implementation +[`CvVideoFrameReader`](./include/CvVideoFrameReader.hpp) in our main function to capture frames from the source using the +`ReadFrame()` function. + +The `CvVideoFrameReader` object also tells us information about the input video. Using this information and application +arguments, we create one of the implementations of the [`IFrameOutput`](./include/IFrameOutput.hpp) interface: +[`CvVideoFileWriter`](./include/CvVideoFileWriter.hpp) or [`CvWindowOutput`](./include/CvWindowOutput.hpp). +This object will be used at the end of every loop to write the processed frame to an output video file or gui +window. +`CvVideoFileWriter` uses `cv::VideoWriter` with ffmpeg backend. `CvWindowOutput` makes use of `cv::imshow()` function. + +See `GetFrameSourceAndSink` function in [Main.cpp](./src/Main.cpp) for more details. + +##### Preparing Labels and Model Specific Functions +In order to interpret the result of running inference on the loaded network, it is required to load the labels +associated with the model. In the provided example code, the `AssignColourToLabel` function creates a vector of pairs +label - colour that is ordered according to object class index at the output node of the model. Labels are assigned with +a randomly generated RGB color. This ensures that each class has a unique color which will prove helpful when plotting +the bounding boxes of various detected objects in a frame. + +Depending on the model being used, `CreatePipeline` function returns specific implementation of the object detection +pipeline. + +### Creating a Network + +All operations with Arm NN and networks are encapsulated in [`ArmnnNetworkExecutor`](./include/ArmnnNetworkExecutor.hpp) +class. + +##### Creating Parser and Importing Graph +The first step with Arm NN SDK is to import a graph from file by using the appropriate parser. + +The Arm NN SDK provides parsers for reading graphs from a variety of model formats. In our application we specifically +focus on `.tflite, .pb, .onnx` models. + +Based on the extension of the provided model file, the corresponding parser is created and the network file loaded with +`CreateNetworkFromBinaryFile()` method. The parser will handle the creation of the underlying Arm NN graph. + +Current example accepts tflite format model files, we use `ITfLiteParser`: +```c++ +#include "armnnTfLiteParser/ITfLiteParser.hpp" + +armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create(); +armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); +``` + +##### Optimizing Graph for Compute Device +Arm NN supports optimized execution on multiple CPU and GPU devices. Prior to executing a graph, we must select the +appropriate device context. We do this by creating a runtime context with default options with `IRuntime()`. + +For example: +```c++ +#include "armnn/ArmNN.hpp" + +auto runtime = armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()); +``` + +We can optimize the imported graph by specifying a list of backends in order of preference and implement +backend-specific optimizations. The backends are identified by a string unique to the backend, +for example `CpuAcc, GpuAcc, CpuRef`. + +For example: +```c++ +std::vector backends{"CpuAcc", "GpuAcc", "CpuRef"}; +``` + +Internally and transparently, Arm NN splits the graph into subgraph based on backends, it calls a optimize subgraphs +function on each of them and, if possible, substitutes the corresponding subgraph in the original graph with +its optimized version. + +Using the `Optimize()` function we optimize the graph for inference and load the optimized network onto the compute +device with `LoadNetwork()`. This function creates the backend-specific workloads +for the layers and a backend specific workload factory which is called to create the workloads. + +For example: +```c++ +armnn::IOptimizedNetworkPtr optNet = Optimize(*network, + backends, + m_Runtime->GetDeviceSpec(), + armnn::OptimizerOptions()); +std::string errorMessage; +runtime->LoadNetwork(0, std::move(optNet), errorMessage)); +std::cerr << errorMessage << std::endl; +``` + +##### Creating Input and Output Binding Information +Parsers can also be used to extract the input information for the network. By calling `GetSubgraphInputTensorNames` +we extract all the input names and, with `GetNetworkInputBindingInfo`, bind the input points of the graph. +For example: +```c++ +std::vector inputNames = parser->GetSubgraphInputTensorNames(0); +auto inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]); +``` +The input binding information contains all the essential information about the input. It is a tuple consisting of +integer identifiers for bindable layers (inputs, outputs) and the tensor info (data type, quantization information, +number of dimensions, total number of elements). + +Similarly, we can get the output binding information for an output layer by using the parser to retrieve output +tensor names and calling `GetNetworkOutputBindingInfo()`. + +### Object detection pipeline + +Generic object detection pipeline has 3 steps to perform data pre-processing, run inference and decode inference results +in the post-processing step. + +See [`ObjDetectionPipeline`](./include/NetworkPipeline.hpp) and implementations for [`MobileNetSSDv1`](./include/NetworkPipeline.hpp) +and [`YoloV3Tiny`](./include/NetworkPipeline.hpp) for more details. + +#### Pre-processing the Captured Frame +Each frame captured from source is read as an `cv::Mat` in BGR format but channels are swapped to RGB in a frame reader +code. + +```c++ +cv::Mat processed; +... +objectDetectionPipeline->PreProcessing(frame, processed); +``` + +A pre-processing step consists of resizing the frame to the required resolution, padding and doing data type conversion +to match the model input layer. +For example, SSD MobileNet V1 that is used in our example takes for input a tensor with shape `[1, 300, 300, 3]` and +data type `uint8`. + +Pre-processing step returns `cv::Mat` object containing data ready for inference. + +#### Executing Inference +```c++ +od::InferenceResults results; +... +objectDetectionPipeline->Inference(processed, results); +``` +Inference step will call `ArmnnNetworkExecutor::Run` method that will prepare input tensors and execute inference. +A compute device performs inference for the loaded network using the `EnqueueWorkload()` function of the runtime context. +For example: +```c++ +//const void* inputData = ...; +//outputTensors were pre-allocated before + +armnn::InputTensors inputTensors = {{ inputBindingInfo.first,armnn::ConstTensor(inputBindingInfo.second, inputData)}}; +runtime->EnqueueWorkload(0, inputTensors, outputTensors); +``` +We allocate memory for output data once and map it to output tensor objects. After successful inference, we read data +from the pre-allocated output data buffer. See [`ArmnnNetworkExecutor::ArmnnNetworkExecutor`](./src/ArmnnNetworkExecutor.cpp) +and [`ArmnnNetworkExecutor::Run`](./src/ArmnnNetworkExecutor.cpp) for more details. + +#### Postprocessing + +##### Decoding and Processing Inference Output +The output from inference must be decoded to obtain information about detected objects in the frame. In the examples +there are implementations for two networks but you may also implement your own network decoding solution here. + +For SSD MobileNet V1 models, we decode the results to obtain the bounding box positions, classification index, +confidence and number of detections in the input frame. +See [`SSDResultDecoder`](./include/SSDResultDecoder.hpp) for more details. + +For YOLO V3 Tiny models, we decode the output and perform non-maximum suppression to filter out any weak detections +below a confidence threshold and any redudant bounding boxes above an intersection-over-union threshold. +See [`YoloResultDecoder`](./include/YoloResultDecoder.hpp) for more details. + +It is encouraged to experiment with threshold values for confidence and intersection-over-union (IoU) +to achieve the best visual results. + +The detection results are always returned as a vector of [`DetectedObject`](./include/DetectedObject.hpp), +with the box positions list containing bounding box coordinates in the form `[x_min, y_min, x_max, y_max]`. + +#### Drawing Bounding Boxes +Post-processing step accepts a callback function to be invoked when the decoding is finished. We will use it +to draw detections on the initial frame. +With the obtained detections and using [`AddInferenceOutputToFrame`](./src/ImageUtils.cpp) function, we are able to draw bounding boxes around +detected objects and add the associated label and confidence score. +```c++ +//results - inference output +objectDetectionPipeline->PostProcessing(results, [&frame, &labels](od::DetectedObjects detects) -> void { + AddInferenceOutputToFrame(detects, *frame, labels); + }); +``` +The processed frames are written to a file or displayed in a separate window. \ No newline at end of file diff --git a/samples/ObjectDetection/cmake/aarch64-toolchain.cmake b/samples/ObjectDetection/cmake/aarch64-toolchain.cmake new file mode 100644 index 0000000000..bdd02f88c0 --- /dev/null +++ b/samples/ObjectDetection/cmake/aarch64-toolchain.cmake @@ -0,0 +1,20 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +# specify the cross compiler +set(GNU_MACHINE "aarch64-linux-gnu") +set(CROSS_PREFIX "aarch64-linux-gnu-") + +set(CMAKE_C_COMPILER ${CROSS_PREFIX}gcc) +set(CMAKE_CXX_COMPILER ${CROSS_PREFIX}g++) +set(CMAKE_AR ${CROSS_PREFIX}ar) +set(CMAKE_STRIP ${CROSS_PREFIX}strip) +set(CMAKE_LINKER ${CROSS_PREFIX}ld) + +set(CMAKE_CROSSCOMPILING true) +set(CMAKE_SYSTEM_NAME Linux) + +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +set(OPENCV_EXTRA_ARGS "-DENABLE_NEON=ON" + "-DCMAKE_TOOLCHAIN_FILE=platforms/linux/aarch64-gnu.toolchain.cmake") \ No newline at end of file diff --git a/samples/ObjectDetection/cmake/arm-linux-gnueabihf-toolchain.cmake b/samples/ObjectDetection/cmake/arm-linux-gnueabihf-toolchain.cmake new file mode 100644 index 0000000000..f66b964c35 --- /dev/null +++ b/samples/ObjectDetection/cmake/arm-linux-gnueabihf-toolchain.cmake @@ -0,0 +1,20 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +# specify the cross compiler +set(GNU_MACHINE "arm-linux-gnueabihf") +set(CROSS_PREFIX "arm-linux-gnueabihf-") + +set(CMAKE_C_COMPILER ${CROSS_PREFIX}gcc) +set(CMAKE_CXX_COMPILER ${CROSS_PREFIX}g++) +set(CMAKE_AR ${CROSS_PREFIX}ar) +set(CMAKE_STRIP ${CROSS_PREFIX}strip) +set(CMAKE_LINKER ${CROSS_PREFIX}ld) + +set(CMAKE_CROSSCOMPILING true) +set(CMAKE_SYSTEM_NAME Linux) + +set(CMAKE_SYSTEM_PROCESSOR arm) + +set(OPENCV_EXTRA_ARGS "-DENABLE_NEON=ON" + "-DCMAKE_TOOLCHAIN_FILE=platforms/linux/arm.toolchain.cmake") \ No newline at end of file diff --git a/samples/ObjectDetection/cmake/find_armnn.cmake b/samples/ObjectDetection/cmake/find_armnn.cmake new file mode 100644 index 0000000000..289e9127f6 --- /dev/null +++ b/samples/ObjectDetection/cmake/find_armnn.cmake @@ -0,0 +1,35 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT +# Search for ArmNN built libraries in user-provided path first, then current repository, then system + +set(ARMNN_LIB_NAMES "libarmnn.so" + "libarmnnTfLiteParser.so") + +set(ARMNN_LIBS "") + +get_filename_component(PARENT_DIR ${PROJECT_SOURCE_DIR} DIRECTORY) +get_filename_component(REPO_DIR ${PARENT_DIR} DIRECTORY) + +foreach(armnn_lib ${ARMNN_LIB_NAMES}) + find_library(ARMNN_${armnn_lib} + NAMES + ${armnn_lib} + HINTS + ${ARMNN_LIB_DIR} ${REPO_DIR} + PATHS + ${ARMNN_LIB_DIR} ${REPO_DIR} + PATH_SUFFIXES + "lib" + "lib64") + if(ARMNN_${armnn_lib}) + message("Found library ${ARMNN_${armnn_lib}}") + list(APPEND ARMNN_LIBS ${ARMNN_${armnn_lib}}) + get_filename_component(LIB_DIR ${ARMNN_${armnn_lib}} DIRECTORY) + get_filename_component(LIB_PARENT_DIR ${LIB_DIR} DIRECTORY) + set(ARMNN_INCLUDE_DIR ${LIB_PARENT_DIR}/include) + endif() +endforeach() + +if(NOT ARMNN_LIBS) + message(FATAL_ERROR "Could not find ArmNN libraries ${ARMNN_LIB_NAMES}") +endif() diff --git a/samples/ObjectDetection/cmake/find_catch.cmake b/samples/ObjectDetection/cmake/find_catch.cmake new file mode 100644 index 0000000000..584b8073bd --- /dev/null +++ b/samples/ObjectDetection/cmake/find_catch.cmake @@ -0,0 +1,16 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +#Test TPIP +set(TEST_TPIP ${DEPENDENCIES_DIR}/test) +file(MAKE_DIRECTORY ${TEST_TPIP}) +set(TEST_TPIP_INCLUDE ${TEST_TPIP}/include) +file(MAKE_DIRECTORY ${TEST_TPIP_INCLUDE}) + +ExternalProject_Add(catch2-headers + URL https://github.com/catchorg/Catch2/releases/download/v2.11.1/catch.hpp + DOWNLOAD_NO_EXTRACT 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND ${CMAKE_COMMAND} -E copy /catch.hpp ${TEST_TPIP_INCLUDE} + INSTALL_COMMAND "" + ) \ No newline at end of file diff --git a/samples/ObjectDetection/cmake/find_opencv.cmake b/samples/ObjectDetection/cmake/find_opencv.cmake new file mode 100644 index 0000000000..4d26953d74 --- /dev/null +++ b/samples/ObjectDetection/cmake/find_opencv.cmake @@ -0,0 +1,204 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +set(OPENCV_VERSION 4.0.0) +set(FFMPEG_VERSION 4.2.1) +set(LIBX264_VERSION stable) + +set(OPENCV_LIB OpenCV${OPENCV_VERSION}) +set(FFMPEG_LIB ffmpeg${FFMPEG_VERSION}) +set(X264_LIB x264${LIBX264_VERSION}) + +set(OPENCV_NAMES + libopencv_core.so.${OPENCV_VERSION} + libopencv_imgproc.so.${OPENCV_VERSION} + libopencv_imgcodecs.so.${OPENCV_VERSION} + libopencv_videoio.so.${OPENCV_VERSION} + libopencv_video.so.${OPENCV_VERSION} + libopencv_highgui.so.${OPENCV_VERSION}) + +set(OPENCV_LIBS) +set(FFMPEG_LIBS) + +foreach(opencv_lib ${OPENCV_NAMES}) + find_library(OPENCV_${opencv_lib} + NAMES + ${opencv_lib} + HINTS + ${OPENCV_LIB_DIR} + PATHS + ${OPENCV_LIB_DIR} + PATH_SUFFIXES + "lib" + "lib64") + if(OPENCV_${opencv_lib}) + message("Found library ${OPENCV_${opencv_lib}}") + list(APPEND OPENCV_LIBS ${OPENCV_${opencv_lib}}) + get_filename_component(OPENCV_LIB_DIR ${OPENCV_${opencv_lib}} DIRECTORY) + get_filename_component(OPENCV_ROOT_DIR ${OPENCV_LIB_DIR} DIRECTORY) + set(OPENCV_INCLUDE_DIR ${OPENCV_ROOT_DIR}/include/opencv4) + endif() +endforeach() + +if(OPENCV_LIBS) + message("OpenCV libraries found") + set(OPENCV_LIBS_FOUND TRUE) +else() + set(OPENCV_ROOT_DIR ${DEPENDENCIES_DIR}/opencv) + set(OPENCV_DEPENDENCIES_ARGS) + set(OPENCV_EXTRA_LINKER_ARGS) + set(OPENCV_PKGCONFIG) + + if(CMAKE_CROSSCOMPILING) + set(FFMPEG_ROOT_DIR ${DEPENDENCIES_DIR}/ffmpeg) + set(LIBX264_ROOT_DIR ${DEPENDENCIES_DIR}/x264) + + if (CMAKE_BUILD_TYPE STREQUAL Debug) + set(CONFIGURE_DEBUG --enable-debug) + set(OPENCV_DEBUG "-DBUILD_WITH_DEBUG_INFO=ON") + endif() + + + ExternalProject_Add(${X264_LIB} + URL "https://code.videolan.org/videolan/x264/-/archive/${LIBX264_VERSION}/x264-${LIBX264_VERSION}.tar.gz" + URL_HASH MD5=07eae2352f6ba201999be45fece0c26c + DOWNLOAD_DIR ${LIBX264_ROOT_DIR} + PREFIX ${LIBX264_ROOT_DIR} + CONFIGURE_COMMAND /configure + --host=${GNU_MACHINE} + --enable-static + --enable-shared + --cross-prefix=${CROSS_PREFIX} + --prefix=${CMAKE_BINARY_DIR} + --extra-ldflags=-static-libstdc++ + --extra-cflags=-fPIC + ${CONFIGURE_DEBUG} + INSTALL_DIR ${CMAKE_BINARY_DIR} + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + ) + + set(FFMPEG_Config + --enable-shared + --enable-cross-compile + --cross-prefix=${CROSS_PREFIX} + --arch=${CMAKE_SYSTEM_PROCESSOR} + --target-os=linux + --prefix=${CMAKE_BINARY_DIR} + --enable-gpl + --enable-nonfree + --enable-libx264 + --extra-cflags=-I${CMAKE_BINARY_DIR}/include + --extra-cflags=-fPIC + --extra-ldflags=-L${CMAKE_BINARY_DIR}/lib + --extra-libs=-ldl + --extra-libs=-static-libstdc++ + ) + + ExternalProject_Add(${FFMPEG_LIB} + URL "https://github.com/FFmpeg/FFmpeg/archive/n${FFMPEG_VERSION}.tar.gz" + URL_HASH MD5=05792c611d1e3ebdf2c7003ff4467390 + DOWNLOAD_DIR ${FFMPEG_ROOT_DIR} + PREFIX ${FFMPEG_ROOT_DIR} + CONFIGURE_COMMAND /configure ${FFMPEG_Config} ${CONFIGURE_DEBUG} + INSTALL_DIR ${CMAKE_BINARY_DIR} + BUILD_COMMAND $(MAKE) VERBOSE=1 + INSTALL_COMMAND $(MAKE) install + ) + + set(OPENCV_DEPENDENCIES_ARGS "-static-libstdc++ -Wl,-rpath,${CMAKE_BINARY_DIR}/lib") + set(OPENCV_EXTRA_LINKER_ARGS "-DOPENCV_EXTRA_EXE_LINKER_FLAGS=${OPENCV_DEPENDENCIES_ARGS}") + + set(OPENCV_PKGCONFIG "PKG_CONFIG_LIBDIR=${CMAKE_BINARY_DIR}/lib/pkgconfig") + + set(FFMPEG_NAMES + libavcodec.so + libavformat.so + libavutil.so + libswscale.so + ) + + foreach(ffmpeg_lib ${FFMPEG_NAMES}) + add_library(FFMPEG_${ffmpeg_lib} SHARED IMPORTED) + set_target_properties(FFMPEG_${ffmpeg_lib} PROPERTIES IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/lib/${ffmpeg_lib}) + list(APPEND OPENCV_LIBS FFMPEG_${ffmpeg_lib}) + endforeach() + + add_library(X264_lib264.so SHARED IMPORTED) + set_target_properties(X264_lib264.so PROPERTIES IMPORTED_LOCATION ${CMAKE_BINARY_DIR}/lib/libx264.so) + list(APPEND OPENCV_LIBS X264_lib264.so) + endif() + + set(OPENCV_CMAKE_ARGS + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_C_FLAGS=-fPIC + -DCMAKE_CXX_FLAGS=-fPIC + -DWITH_GTK=OFF + -DWITH_JPEG=ON + -DWITH_IPP=OFF + -DBUILD_opencv_java_bindings_generator=OFF + -DBUILD_opencv_ml=OFF + -DBUILD_opencv_objdetect=OFF + -DBUILD_opencv_photo=OFF + -DBUILD_opencv_python_bindings_generator=OFF + -DBUILD_opencv_stitching=OFF + -DBUILD_opencv_gapi=OFF + -DBUILD_opencv_features2d=OFF + -DBUILD_opencv_dnn=OFF + -DBUILD_opencv_flann=OFF + -DBUILD_opencv_calib3d=OFF + -DBUILD_opencv_python2=OFF + -DBUILD_opencv_python3=OFF + -DBUILD_opencv_java=OFF + -DBUILD_opencv_js=OFF + -DBUILD_opencv_ts=OFF + -DBUILD_JPEG=ON + -DBUILD_JPEG_TURBO_DISABLE=ON + -DBUILD_PNG=ON + -DBUILD_TIFF=ON + -DZLIB_FOUND=OFF + -DBUILD_ZLIB=ON + -DBUILD_PERF_TESTS=OFF + -DBUILD_TESTS=OFF + -DBUILD_DOCS=OFF + -DBUILD_opencv_apps=OFF + -DBUILD_EXAMPLES=OFF + -DWITH_V4L=ON + -DWITH_LIBV4L=OFF + -DWITH_FFMPEG=ON + -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_INSTALL_RPATH=\$ORIGIN:\$ORIGIN/lib:\$ORIGIN/../lib + -DCMAKE_SHARED_LINKER_FLAGS=-static-libstdc++ + ${OPENCV_DEBUG} + ) + + ExternalProject_Add(${OPENCV_LIB} + URL "https://codeload.github.com/opencv/opencv/tar.gz/${OPENCV_VERSION}" + URL_HASH MD5=f051c1ff7b327b60123d71b53801b316 + DOWNLOAD_DIR ${OPENCV_ROOT_DIR} + PREFIX ${OPENCV_ROOT_DIR} + CONFIGURE_COMMAND ${OPENCV_PKGCONFIG} + ${CMAKE_COMMAND} ${OPENCV_CMAKE_ARGS} ${OPENCV_EXTRA_ARGS} + ${OPENCV_EXTRA_LINKER_ARGS} ${OPENCV_ROOT_DIR}/src/${OPENCV_LIB} + INSTALL_DIR ${CMAKE_BINARY_DIR} + BUILD_COMMAND $(MAKE) + INSTALL_COMMAND $(MAKE) install + ) + + if(CMAKE_CROSSCOMPILING) + ExternalProject_Add_StepDependencies(${FFMPEG_LIB} build ${X264_LIB}) + ExternalProject_Add_StepDependencies(${OPENCV_LIB} build ${FFMPEG_LIB}) + endif() + + set(OPENCV_INCLUDE_DIR ${CMAKE_BINARY_DIR}/include/opencv4) + set(OPENCV_LIB_DIR ${CMAKE_BINARY_DIR}/lib) + + foreach(opencv_lib ${OPENCV_NAMES}) + add_library(OPENCV_${opencv_lib} SHARED IMPORTED) + set_target_properties(OPENCV_${opencv_lib} PROPERTIES IMPORTED_LOCATION ${OPENCV_LIB_DIR}/${opencv_lib}) + list(APPEND OPENCV_LIBS OPENCV_${opencv_lib}) + endforeach() + +endif() \ No newline at end of file diff --git a/samples/ObjectDetection/cmake/unit_tests.cmake b/samples/ObjectDetection/cmake/unit_tests.cmake new file mode 100644 index 0000000000..dcfa512893 --- /dev/null +++ b/samples/ObjectDetection/cmake/unit_tests.cmake @@ -0,0 +1,65 @@ +# Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +# SPDX-License-Identifier: MIT + +set(TEST_RESOURCES_DIR ${CMAKE_SOURCE_DIR}/test/resources) +add_definitions (-DTEST_RESOURCE_DIR="${TEST_RESOURCES_DIR}") +set(TEST_TARGET_NAME "${CMAKE_PROJECT_NAME}-tests") + +file(GLOB TEST_SOURCES "test/*") + +include(cmake/find_catch.cmake) + +file(DOWNLOAD "https://storage.googleapis.com/download.tensorflow.org/models/tflite/coco_ssd_mobilenet_v1_1.0_quant_2018_06_29.zip" + ${CMAKE_CURRENT_SOURCE_DIR}/test/resources/models.zip SHOW_PROGRESS) + +# Extract +execute_process( + COMMAND ${CMAKE_COMMAND} -E tar xf models.zip + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/test/resources/ + RESULT_VARIABLE return_code +) + +ExternalProject_Add(basketball-image + URL https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/basketball1.png + DOWNLOAD_NO_EXTRACT 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND ${CMAKE_COMMAND} -E copy /basketball1.png ${CMAKE_CURRENT_SOURCE_DIR}/test/resources + INSTALL_COMMAND "" +) + +ExternalProject_Add(messi + URL https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/messi5.jpg + DOWNLOAD_NO_EXTRACT 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND ${CMAKE_COMMAND} -E copy /messi5.jpg ${CMAKE_CURRENT_SOURCE_DIR}/test/resources + INSTALL_COMMAND "" + ) + +ExternalProject_Add(vtest + URL https://raw.githubusercontent.com/opencv/opencv/4.0.0/samples/data/Megamind.avi + DOWNLOAD_NO_EXTRACT 1 + CONFIGURE_COMMAND "" + BUILD_COMMAND ${CMAKE_COMMAND} -E copy /Megamind.avi ${CMAKE_CURRENT_SOURCE_DIR}/test/resources + INSTALL_COMMAND "" + ) + +add_executable("${TEST_TARGET_NAME}" ${SOURCES} ${TEST_SOURCES}) + +add_dependencies( + "${TEST_TARGET_NAME}" + "catch2-headers" + "vtest" + "messi" + "basketball-image" +) + +if (NOT OPENCV_LIBS_FOUND) + message("Building OpenCV libs") + add_dependencies("${TEST_TARGET_NAME}" "${OPENCV_LIB}") +endif() + +target_include_directories("${TEST_TARGET_NAME}" PUBLIC ${TEST_TPIP_INCLUDE} + ${ARMNN_INCLUDE_DIR} + ${OPENCV_INCLUDE_DIR} ${DEPENDENCIES_DIR} ${TEST_RESOURCES_DIR}) + +target_link_libraries("${TEST_TARGET_NAME}" PUBLIC ${ARMNN_LIBS} ${OPENCV_LIBS} ${FFMPEG_LIBS}) \ No newline at end of file diff --git a/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp b/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp new file mode 100644 index 0000000000..c75b68bbe1 --- /dev/null +++ b/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Types.hpp" + +#include "armnn/ArmNN.hpp" +#include "armnnTfLiteParser/ITfLiteParser.hpp" +#include "armnnUtils/DataLayoutIndexed.hpp" +#include + +#include +#include + +namespace od +{ +/** +* @brief Used to load in a network through ArmNN and run inference on it against a given backend. +* +*/ +class ArmnnNetworkExecutor +{ +private: + armnn::IRuntimePtr m_Runtime; + armnn::NetworkId m_NetId{}; + mutable InferenceResults m_OutputBuffer; + armnn::InputTensors m_InputTensors; + armnn::OutputTensors m_OutputTensors; + std::vector m_outputBindingInfo; + + std::vector m_outputLayerNamesList; + + armnnTfLiteParser::BindingPointInfo m_inputBindingInfo; + + void PrepareTensors(const void* inputData, const size_t dataBytes); + + template + auto log_as_int(Enumeration value) + -> typename std::underlying_type::type + { + return static_cast::type>(value); + } + +public: + ArmnnNetworkExecutor() = delete; + + /** + * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a + * given backend. + * + * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors + * in output of the Run method. + * + * * @param[in] modelPath - Relative path to the model file + * * @param[in] backends - The list of preferred backends to run inference on + */ + ArmnnNetworkExecutor(std::string& modelPath, + std::vector& backends); + + /** + * @brief Returns the aspect ratio of the associated model in the order of width, height. + */ + Size GetImageAspectRatio(); + + armnn::DataType GetInputDataType() const; + + /** + * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object. + * + * @param[in] inputData - input frame data + * @param[in] dataBytes - input data size in bytes + * @param[out] results - Vector of DetectionResult objects used to store the output result. + */ + bool Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults); + +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/BoundingBox.hpp b/samples/ObjectDetection/include/BoundingBox.hpp new file mode 100644 index 0000000000..2b790401db --- /dev/null +++ b/samples/ObjectDetection/include/BoundingBox.hpp @@ -0,0 +1,108 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +namespace od +{ +/** +* @brief Class used to store and receive bounding box location and size information +* +*/ +class BoundingBox +{ +public: + /** + * @brief Default constructor + */ + BoundingBox(); + + /** + * @brief Constructor with parameters to configure the bounding box dimensions + * @param[in] x int value representing the x coordinate. + * @param[in] y int value representing the y coordinate. + * @param[in] width unsigned int value representing the width value. + * @param[in] height unsigned int value representing the height value. + */ + BoundingBox(int x, int y, unsigned int width, unsigned int height); + + /** + * @brief Constructor with a BoundingBox type parameter to copy from. + * @param[in] other Bounding box to copy. + */ + BoundingBox(const BoundingBox& other); + + ~BoundingBox() = default; + + /** + * @brief Function to retrieve the X coordinate. + */ + int GetX() const; + + /** + * @brief Function to retrieve the Y coordinate. + */ + int GetY() const; + + /** + * @brief Function to retrieve the width. + */ + unsigned int GetWidth() const; + + /** + * @brief Function to retrieve the height. + */ + unsigned int GetHeight() const; + + /** + * @brief Function to set the X coordinate. + * @param[in] x int value representing x coordinate + */ + void SetX(int x); + + /** + * @brief Function to set the Y coordinate. + * @param[in] y int value representing y coordinate + */ + void SetY(int y); + + /** + * @brief Function to set the width of the BoundingBox. + * @param[in] width int value representing the width + */ + void SetWidth(unsigned int width); + + /** + * @brief Function to set the height of the BoundingBox. + * @param[in] height int value representing the height + */ + void SetHeight(unsigned int height); + + /** + * @brief Function to check equality with another BoundingBox + * @param[in] other BoundingBox to compare with + */ + BoundingBox& operator=(const BoundingBox& other); + +private: + int m_X; + int m_Y; + unsigned int m_Width; + unsigned int m_Height; +}; + +/* + * @brief: Get a bounding box within the limits of another bounding box + * + * @param[in] boxIn Input bounding box + * @param[out] boxOut Output bounding box + * @param[in] boxLimits Bounding box defining the limits which the output + * needs to conform to. + * @return none + */ +void GetValidBoundingBox(const BoundingBox& boxIn, BoundingBox& boxOut, + const BoundingBox& boxLimits); + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/CmdArgsParser.hpp b/samples/ObjectDetection/include/CmdArgsParser.hpp new file mode 100644 index 0000000000..6c22e6ff6d --- /dev/null +++ b/samples/ObjectDetection/include/CmdArgsParser.hpp @@ -0,0 +1,50 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once +#include +#include +#include + +const std::string MODEL_NAME = "--model-name"; +const std::string VIDEO_FILE_PATH = "--video-file-path"; +const std::string MODEL_FILE_PATH = "--model-file-path"; +const std::string OUTPUT_VIDEO_FILE_PATH = "--output-video-file-path"; +const std::string LABEL_PATH = "--label-path"; +const std::string PREFERRED_BACKENDS = "--preferred-backends"; +const std::string HELP = "--help"; + +/* + * The accepted options for this Object detection executable + */ +static std::map CMD_OPTIONS = { + {VIDEO_FILE_PATH, "[REQUIRED] Path to the video file to run object detection on"}, + {MODEL_FILE_PATH, "[REQUIRED] Path to the Object Detection model to use"}, + {LABEL_PATH, "[REQUIRED] Path to the label set for the provided model file. " + "Label file is should just be an ordered list, seperated by new line."}, + {MODEL_NAME, "[REQUIRED] The name of the model being used. Accepted options: YOLO_V3_TINY, SSD_MOBILE"}, + {OUTPUT_VIDEO_FILE_PATH, "[OPTIONAL] Path to the output video file with detections added in. " + "If specified will save file to disk, else displays the output to screen"}, + {PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma." + " For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]." + " Defaults to CpuAcc,CpuRef"} +}; + +/* + * Checks that a particular option was specified by the user + */ +bool CheckOptionSpecified(const std::map& options, const std::string& option); + + +/* + * Retrieves the user provided option + */ +std::string GetSpecifiedOption(const std::map& options, const std::string& option); + + +/* + * Parses all the command line options provided by the user and stores in a map. + */ +int ParseOptions(std::map& options, std::map& acceptedOptions, + char *argv[], int argc); \ No newline at end of file diff --git a/samples/ObjectDetection/include/CvVideoFileWriter.hpp b/samples/ObjectDetection/include/CvVideoFileWriter.hpp new file mode 100644 index 0000000000..ea1501b68e --- /dev/null +++ b/samples/ObjectDetection/include/CvVideoFileWriter.hpp @@ -0,0 +1,61 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "IFrameOutput.hpp" +#include + +namespace od +{ + +class CvVideoFileWriter : public IFrameOutput { +public: + /** + * @brief Default constructor. + * + * Underlying open cv video writer object will be instantiated. + */ + CvVideoFileWriter() = default; + + ~CvVideoFileWriter() override = default; + + /** + * @brief Initialises video file writer. + * + * Opens opencv writer with given params. FFMPEG backend is used. + * + * @param outputVideo path to the video file. + * @param encoding cv::CAP_PROP_FOURCC code. + * @param fps target frame rate. + * @param width target frame width. + * @param height target frame height. + * + */ + void Init(const std::string& outputVideo, int encoding, double fps, int width, int height); + + /** + * Writes frame to the file using opencv writer. + * + * @param frame data to write. + */ + void WriteFrame(std::shared_ptr& frame) override; + + /** + * Releases opencv writer. + */ + void Close() override; + + /** + * Checks if opencv writer was successfully opened. + * @return true is underlying writer is ready to be used, false otherwise. + */ + bool IsReady() const override; + +private: + cv::VideoWriter m_cvWriter{}; + bool m_ready = false; +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/CvVideoFrameReader.hpp b/samples/ObjectDetection/include/CvVideoFrameReader.hpp new file mode 100644 index 0000000000..081f92620e --- /dev/null +++ b/samples/ObjectDetection/include/CvVideoFrameReader.hpp @@ -0,0 +1,108 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + + +#include "IFrameReader.hpp" +#include + +namespace od +{ + +class CvVideoFrameReader : + public IFrameReader +{ +public: + /** + * @brief Default constructor. + * + * Underlying open cv video capture object will be instantiated. + */ + CvVideoFrameReader() = default; + + ~CvVideoFrameReader() override = default; + + /** + *@brief Initialises reader to capture frames from video file. + * + * @param source path to the video file or image sequence. + * + * @throws std::runtime_error if init failed + */ + void Init(const std::string& source); + + std::shared_ptr ReadFrame() override; + + bool IsExhausted(const std::shared_ptr & frame) const override; + + /** + * Returns effective video frame width supported by the source/set by the user. + * Must be called after Init method. + * @return frame width + */ + int GetSourceWidth() const; + + /** + * Returns effective video frame height supported by the source/set by the user. + * Must be called after Init method. + * @return frame height + */ + int GetSourceHeight() const; + + /** + * Returns effective fps value supported by the source/set by the user. + * @return fps value + */ + double GetSourceFps() const; + + /** + * Will query OpenCV to convert images to RGB + * Copy is actually default behaviour, but the set function needs to be called + * in order to know whether OpenCV supports conversion from our source format. + * @return boolean, + * true: OpenCV returns RGB + * false: OpenCV returns the fourcc format from GetSourceEncoding + */ + bool ConvertToRGB(); + + /** + * Returns 4-character code of codec. + * @return codec name + */ + std::string GetSourceEncoding() const; + + /** + * Get the fourcc int from its string name. + * @return codec int + */ + int GetSourceEncodingInt() const; + + int GetFrameCount() const; + +private: + cv::VideoCapture m_capture; + + void CheckIsOpen(const std::string& source); +}; + +class CvVideoFrameReaderRgbWrapper : + public IFrameReader +{ +public: + CvVideoFrameReaderRgbWrapper() = delete; + CvVideoFrameReaderRgbWrapper(const CvVideoFrameReaderRgbWrapper& o) = delete; + CvVideoFrameReaderRgbWrapper(CvVideoFrameReaderRgbWrapper&& o) = delete; + + CvVideoFrameReaderRgbWrapper(std::unique_ptr reader); + + std::shared_ptr ReadFrame() override; + + bool IsExhausted(const std::shared_ptr& frame) const override; + +private: + std::unique_ptr m_reader; +}; + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/CvWindowOutput.hpp b/samples/ObjectDetection/include/CvWindowOutput.hpp new file mode 100644 index 0000000000..317327ba62 --- /dev/null +++ b/samples/ObjectDetection/include/CvWindowOutput.hpp @@ -0,0 +1,53 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "IFrameOutput.hpp" +#include + +namespace od +{ + +class CvWindowOutput : public IFrameOutput { +public: + + CvWindowOutput() = default; + + ~CvWindowOutput() override = default; + + /** + * @brief Creates a named window. + * + * Uses opencv to create a window with given name. + * + * @param windowName opencv window name. + * + */ + void Init(const std::string& windowName); + + /** + * Writes frame to the window. + * + * @param frame data to write. + */ + void WriteFrame(std::shared_ptr& frame) override; + + /** + * Releases all windows. + */ + void Close() override; + + /** + * Always true. + * @return true. + */ + bool IsReady() const override; + +private: + std::string m_windowName; + +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/DetectedObject.hpp b/samples/ObjectDetection/include/DetectedObject.hpp new file mode 100644 index 0000000000..315ebccf07 --- /dev/null +++ b/samples/ObjectDetection/include/DetectedObject.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "BoundingBox.hpp" + +#include +#include + +namespace od +{ +/** + * An object detection network inference result decoded data representation. + */ +class DetectedObject +{ + +public: + DetectedObject(); + + /** + * Creates detection with given parameters. + * + * @param id - class id + * @param label - human readable text class label + * @param boundingBox - rectangular detection coordinates + * @param score - detection score/probability + */ + DetectedObject(unsigned int id, + std::string label, + const BoundingBox& boundingBox, + float score); + + ~DetectedObject() = default; + + /** + * Get class id + * @return id + */ + unsigned int GetId() const; + + /** + * Get human readable text class label + * @return label + */ + const std::string& GetLabel() const; + + /** + * Get rectangular detection coordinates + * @return detection coordinates + */ + const BoundingBox& GetBoundingBox() const; + + /** + * Get detection score + * @return score + */ + float GetScore() const; + + /** + * Set class id + * @param[in] id - class id + */ + void SetId(unsigned int id); + + /** + * Set class label + * @param[in] label - human readable text class label + */ + void SetLabel(const std::string& label); + + /** + * Set detection coordinates + * @param[in] boundingBox detection coordinates + */ + void SetBoundingBox(const BoundingBox& boundingBox); + + /** + * Set detection score + * @param[in] score - detection score + */ + void SetScore(float score); + +private: + unsigned int m_Id; + std::string m_Label; + BoundingBox m_BoundingBox; + float m_Score; +}; + +using DetectedObjects = std::vector; + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/IDetectionResultDecoder.hpp b/samples/ObjectDetection/include/IDetectionResultDecoder.hpp new file mode 100644 index 0000000000..c0a29df33f --- /dev/null +++ b/samples/ObjectDetection/include/IDetectionResultDecoder.hpp @@ -0,0 +1,39 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "DetectedObject.hpp" +#include "Types.hpp" + +#include + +namespace od +{ + +class IDetectionResultDecoder +{ +public: + /** + * @brief Returns decoded detected objects from a network model. + * @desc Outputs 4 vectors: bounding boxes, label, probabilities & number of detections. + * This function decodes network model output and converts it to expected format. + * + * @param[in] results Vector of outputs from a model. + * @param[in] outputFrameSize Struct containing height & width of output frame that is displayed. + * @param[in] resizedFrameSize Struct containing height & width of resized input frame before padding + * and inference. + * @param[in] labels Vector of network labels. + * @param[in] detectionScoreThreshold float value for the detection score threshold. + * + * @return Vector of decoded detected objects. + */ + virtual DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector& labels) = 0; + +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/IFrameOutput.hpp b/samples/ObjectDetection/include/IFrameOutput.hpp new file mode 100644 index 0000000000..c8b4fe5a47 --- /dev/null +++ b/samples/ObjectDetection/include/IFrameOutput.hpp @@ -0,0 +1,48 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace od +{ +/** + * @brief Frames output interface + * + * @tparam FrameDataT frame container data type + */ + template class IFrameOutput + { + + public: + /** + * @brief Writes frame to the selected output + * + * @param frame container + */ + virtual void WriteFrame(std::shared_ptr & frame) = 0; + + /** + * @brief Closes the frame output + */ + virtual void Close() = 0; + + /** + * @brief Checks if the frame sink is ready to write. + * + * @return True if frame sink is ready, False otherwise + */ + virtual bool IsReady() const = 0; + + /** + * @brief Default destructor + */ + virtual ~IFrameOutput() = default; + + }; + +}// namespace od diff --git a/samples/ObjectDetection/include/IFrameReader.hpp b/samples/ObjectDetection/include/IFrameReader.hpp new file mode 100644 index 0000000000..d371b7d2a5 --- /dev/null +++ b/samples/ObjectDetection/include/IFrameReader.hpp @@ -0,0 +1,45 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace od +{ +/** + * @brief Frame source reader interface + * + * @tparam FrameDataT frame container data type + */ +template class IFrameReader +{ + +public: + /** + * @brief Reads the next frame from the source + * + * @return pointer to the frame container + */ + virtual std::shared_ptr ReadFrame() = 0; + + /** + * @brief Checks if the frame source has more frames to read. + * + * @param[in] frame the pointer to the last frame captured with the ReadFrame method could be used in + * implementation specific logic to check frames source state. + * @return True if frame source was exhausted, False otherwise + */ + virtual bool IsExhausted(const std::shared_ptr & frame) const = 0; + + /** + * @brief Default destructor + */ + virtual ~IFrameReader() = default; + +}; + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/ImageUtils.hpp b/samples/ObjectDetection/include/ImageUtils.hpp new file mode 100644 index 0000000000..07e2b839f9 --- /dev/null +++ b/samples/ObjectDetection/include/ImageUtils.hpp @@ -0,0 +1,58 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "DetectedObject.hpp" +#include "Types.hpp" + +#include + +#include + +const cv::InterpolationFlags DefaultResizeFlag = cv::INTER_NEAREST; + +/** +* @brief Function to process the decoded results from the inference, and overlay the detail onto the provided frame +* @param[in] decodedResults the decoded results from the inference output. +* @param[in] inputFrame the frame to overlay the inference output details onto. +* @param[in] labels the label set associated with the trained model used. +*/ +void AddInferenceOutputToFrame(od::DetectedObjects& decodedResults, + cv::Mat& inputFrame, + std::vector>& labels); + +/** +* @brief Function to resize a frame while keeping aspect ratio. +* +* @param[in] frame the frame we want to resize from. +* @param[out] dest the frame we want to resize into. +* @param[in] aspectRatio aspect ratio to use when resizing. +*/ +void ResizeFrame(const cv::Mat& frame, cv::Mat& dest, const od::Size& aspectRatio); + +/** +* @brief Function to pad a frame. +* @param[in] src the frame we want to pad. +* @param[out] dest the frame we want to store the result. +* @param[in] bottom padding to use on bottom of the frame. +* @param[in] right padding to use on the right of the frame. +*/ +void PadFrame(const cv::Mat& src, cv::Mat& dest, int bottom, int right); + +/** + * Resize frame to the destination size and pad if necessary to preserve initial frame aspect ratio. + * + * @param frame input frame to resize + * @param dest output frame to place resized and padded result + * @param cache operation requires intermediate data container. + * @param destSize size of the destination frame + */ +void ResizeWithPad(const cv::Mat& frame, cv::Mat& dest, cv::Mat& cache, const od::Size& destSize); + +/** +* @brief Function to retrieve the cv::scalar color from a RGB tuple. +* @param[in] color the tuple form of the RGB color +*/ +static cv::Scalar GetScalarColorCode(std::tuple color); \ No newline at end of file diff --git a/samples/ObjectDetection/include/NetworkPipeline.hpp b/samples/ObjectDetection/include/NetworkPipeline.hpp new file mode 100644 index 0000000000..c3408b494e --- /dev/null +++ b/samples/ObjectDetection/include/NetworkPipeline.hpp @@ -0,0 +1,148 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ArmnnNetworkExecutor.hpp" +#include "YoloResultDecoder.hpp" +#include "SSDResultDecoder.hpp" +# include "ImageUtils.hpp" + +#include + +namespace od +{ +/** + * Generic object detection pipeline with 3 steps: data pre-processing, inference execution and inference + * result post-processing. + * + */ +class ObjDetectionPipeline { +public: + + /** + * Creates object detection pipeline with given network executor and decoder. + * @param executor - unique pointer to inference runner + * @param decoder - unique pointer to inference results decoder + */ + ObjDetectionPipeline(std::unique_ptr executor, + std::unique_ptr decoder); + + /** + * @brief Standard image pre-processing implementation. + * + * Re-sizes an image keeping aspect ratio, pads if necessary to fit the network input layer dimensions. + + * @param[in] frame - input image, expected data type is uint8. + * @param[out] processed - output image, data type is preserved. + */ + virtual void PreProcessing(const cv::Mat& frame, cv::Mat& processed); + + /** + * @brief Executes inference + * + * Calls inference runner provided during instance construction. + * + * @param[in] processed - input inference data. Data type should be aligned with input tensor. + * @param[out] result - raw floating point inference results. + */ + virtual void Inference(const cv::Mat& processed, InferenceResults& result); + + /** + * @brief Standard inference results post-processing implementation. + * + * Decodes inference results using decoder provided during construction. + * + * @param[in] inferenceResult - inference results to be decoded. + * @param[in] callback - a function to be called after successful inference results decoding. + */ + virtual void PostProcessing(InferenceResults& inferenceResult, + const std::function& callback); + +protected: + std::unique_ptr m_executor; + std::unique_ptr m_decoder; + Size m_inputImageSize{}; + cv::Mat m_processedFrame; +}; + +/** + * Specific to Yolo v3 tiny object detection pipeline implementation. + */ +class YoloV3Tiny: public ObjDetectionPipeline{ +public: + + /** + * Constructs object detection pipeline for Yolo v3 tiny network. + * + * Network input is expected to be uint8 or fp32. Data range [0, 255]. + * Network output is FP32. + * + * @param executor[in] - unique pointer to inference runner + * @param NMSThreshold[in] - non max suppression threshold for decoding step + * @param ClsThreshold[in] - class probability threshold for decoding step + * @param ObjectThreshold[in] - detected object score threshold for decoding step + */ + YoloV3Tiny(std::unique_ptr executor, + float NMSThreshold, float ClsThreshold, float ObjectThreshold); + + /** + * @brief Yolo v3 tiny image pre-processing implementation. + * + * On top of the standard pre-processing, converts input data type according to the network input tensor data type. + * Supported data types: uint8 and float32. + * + * @param[in] original - input image data + * @param[out] processed - image data ready to be used for inference. + */ + void PreProcessing(const cv::Mat& original, cv::Mat& processed); + +}; + +/** + * Specific to MobileNet SSD v1 object detection pipeline implementation. + */ +class MobileNetSSDv1: public ObjDetectionPipeline { + +public: + /** + * Constructs object detection pipeline for MobileNet SSD network. + * + * Network input is expected to be uint8 or fp32. Data range [-1, 1]. + * Network output is FP32. + * + * @param[in] - unique pointer to inference runner + * @paramp[in] objectThreshold - detected object score threshold for decoding step + */ + MobileNetSSDv1(std::unique_ptr executor, + float objectThreshold); + + /** + * @brief MobileNet SSD image pre-processing implementation. + * + * On top of the standard pre-processing, converts input data type according to the network input tensor data type + * and scales input data from [0, 255] to [-1, 1] for FP32 input. + * + * Supported input data types: uint8 and float32. + * + * @param[in] original - input image data + * @param processed[out] - image data ready to be used for inference. + */ + void PreProcessing(const cv::Mat& original, cv::Mat& processed); + +}; + +using IPipelinePtr = std::unique_ptr; + +/** + * Constructs object detection pipeline based on configuration provided. + * + * @param[in] config - object detection pipeline configuration. + * + * @return unique pointer to object detection pipeline. + */ +IPipelinePtr CreatePipeline(od::ODPipelineOptions& config); + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/NonMaxSuppression.hpp b/samples/ObjectDetection/include/NonMaxSuppression.hpp new file mode 100644 index 0000000000..204af0b528 --- /dev/null +++ b/samples/ObjectDetection/include/NonMaxSuppression.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "DetectedObject.hpp" + +#include +#include + +namespace od +{ + +/** +* @brief Returns a vector of indices corresponding to input detections kept after NMS. +* +* Perform non max suppression on input detections. Any detections with iou greater than +* given threshold are suppressed. Different detection labels are considered independently. +* +* @param[in] Vector of decoded detections. +* @param[in] Detects with IOU larger than this threshold are suppressed. +* @return Vector of indices corresponding to input detections kept after NMS. +* +*/ +std::vector NonMaxSuppression(DetectedObjects& inputDetections, float iouThresh); + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/SSDResultDecoder.hpp b/samples/ObjectDetection/include/SSDResultDecoder.hpp new file mode 100644 index 0000000000..65afb8d376 --- /dev/null +++ b/samples/ObjectDetection/include/SSDResultDecoder.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Types.hpp" +#include "DetectedObject.hpp" +#include "IDetectionResultDecoder.hpp" + +namespace od +{ + +class SSDResultDecoder : public IDetectionResultDecoder +{ +public: + /** + * Constructs MobileNet ssd v1 inference results decoder. + * + * @param ObjectThreshold object score threshold + */ + SSDResultDecoder(float ObjectThreshold); + + DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector& labels) override; + +private: + float m_objectThreshold; +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/Types.hpp b/samples/ObjectDetection/include/Types.hpp new file mode 100644 index 0000000000..801cff392a --- /dev/null +++ b/samples/ObjectDetection/include/Types.hpp @@ -0,0 +1,50 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace od +{ + +struct Size +{ + + uint32_t m_Width; + uint32_t m_Height; + + Size() : Size(0, 0) {} + + Size(uint32_t width, uint32_t height) : + m_Width{width}, m_Height{height} {} + + Size(const Size& other) + : Size(other.m_Width, other.m_Height) {} + + ~Size() = default; + + Size &operator=(const Size& other) = default; +}; + +struct BBoxColor +{ + std::tuple colorCode; +}; + +struct ODPipelineOptions +{ + std::string m_ModelName; + std::string m_ModelFilePath; + std::vector m_backends; +}; + +using InferenceResult = std::vector; +using InferenceResults = std::vector; +} \ No newline at end of file diff --git a/samples/ObjectDetection/include/YoloResultDecoder.hpp b/samples/ObjectDetection/include/YoloResultDecoder.hpp new file mode 100644 index 0000000000..98435e3cc9 --- /dev/null +++ b/samples/ObjectDetection/include/YoloResultDecoder.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Types.hpp" +#include "ArmnnNetworkExecutor.hpp" +#include "DetectedObject.hpp" +#include "IDetectionResultDecoder.hpp" +#include "NonMaxSuppression.hpp" + +namespace od +{ + +class YoloResultDecoder : public IDetectionResultDecoder +{ + +public: + /** + * Constructs Yolo V3 inference reuslts decoder. + * + * @param NMSThreshold non max suppression threshold + * @param ClsThreshold class probability threshold + * @param ObjectThreshold detected object score threshold + */ + YoloResultDecoder(float NMSThreshold, float ClsThreshold, float ObjectThreshold); + + DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector & labels) override; +private: + float m_NmsThreshold; + float m_ClsThreshold; + float m_objectThreshold; + + unsigned int m_boxElements = 4U; + unsigned int m_confidenceElements = 1U; + unsigned int m_numClasses = 80U; + unsigned int m_numBoxes = 2535U; +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp new file mode 100644 index 0000000000..cb4c0c9f84 --- /dev/null +++ b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp @@ -0,0 +1,140 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArmnnNetworkExecutor.hpp" +#include "Types.hpp" + +#include +#include + +namespace od +{ + +armnn::DataType ArmnnNetworkExecutor::GetInputDataType() const +{ + return m_inputBindingInfo.second.GetDataType(); +} + +ArmnnNetworkExecutor::ArmnnNetworkExecutor(std::string& modelPath, + std::vector& preferredBackends) +: m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions())) +{ + // Import the TensorFlow lite model. + armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create(); + armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); + + std::vector inputNames = parser->GetSubgraphInputTensorNames(0); + + m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]); + + m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0); + + std::vector outputBindings; + for(const std::string& name : m_outputLayerNamesList) + { + m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name))); + } + + std::vector errorMessages; + // optimize the network. + armnn::IOptimizedNetworkPtr optNet = Optimize(*network, + preferredBackends, + m_Runtime->GetDeviceSpec(), + armnn::OptimizerOptions(), + armnn::Optional&>(errorMessages)); + + if (!optNet) + { + const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"}; + ARMNN_LOG(error) << errorMessage; + throw armnn::Exception(errorMessage); + } + + // Load the optimized network onto the m_Runtime device + std::string errorMessage; + if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage)) + { + ARMNN_LOG(error) << errorMessage; + } + + //pre-allocate memory for output (the size of it never changes) + for (int it = 0; it < m_outputLayerNamesList.size(); ++it) + { + const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType(); + const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape(); + + InferenceResult oneLayerOutResult; + switch (dataType) + { + case armnn::DataType::Float32: + { + oneLayerOutResult.resize(tensorShape.GetNumElements(), 0); + break; + } + default: + { + errorMessage = "ArmnnNetworkExecutor: unsupported output tensor data type"; + ARMNN_LOG(error) << errorMessage << " " << log_as_int(dataType); + throw armnn::Exception(errorMessage); + } + } + + m_OutputBuffer.emplace_back(oneLayerOutResult); + + // Make ArmNN output tensors + m_OutputTensors.reserve(m_OutputBuffer.size()); + for (size_t it = 0; it < m_OutputBuffer.size(); ++it) + { + m_OutputTensors.emplace_back(std::make_pair( + m_outputBindingInfo[it].first, + armnn::Tensor(m_outputBindingInfo[it].second, + m_OutputBuffer.at(it).data()) + )); + } + } + +} + +void ArmnnNetworkExecutor::PrepareTensors(const void* inputData, const size_t dataBytes) +{ + assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes); + m_InputTensors.clear(); + m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}}; +} + +bool ArmnnNetworkExecutor::Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults) +{ + /* Prepare tensors if they are not ready */ + ARMNN_LOG(debug) << "Preparing tensors..."; + this->PrepareTensors(inputData, dataBytes); + ARMNN_LOG(trace) << "Running inference..."; + + armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors); + + std::stringstream inferenceFinished; + inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n"; + + ARMNN_LOG(trace) << inferenceFinished.str(); + + if (ret == armnn::Status::Failure) + { + ARMNN_LOG(error) << "Failed to perform inference."; + } + + outResults.reserve(m_outputLayerNamesList.size()); + outResults = m_OutputBuffer; + + return (armnn::Status::Success == ret); +} + +Size ArmnnNetworkExecutor::GetImageAspectRatio() +{ + const auto shape = m_inputBindingInfo.second.GetShape(); + assert(shape.GetNumDimensions() == 4); + armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC); + return Size(shape[nhwc.GetWidthIndex()], + shape[nhwc.GetHeightIndex()]); +} +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/BoundingBox.cpp b/samples/ObjectDetection/src/BoundingBox.cpp new file mode 100644 index 0000000000..c52b0fe58a --- /dev/null +++ b/samples/ObjectDetection/src/BoundingBox.cpp @@ -0,0 +1,116 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "BoundingBox.hpp" +#include +namespace od +{ + +BoundingBox::BoundingBox() : + BoundingBox(0, 0, 0u, 0u) {} + +BoundingBox::BoundingBox( + int x, + int y, + unsigned int width, + unsigned int height) : + m_X(x), + m_Y(y), + m_Width(width), + m_Height(height) {} + +BoundingBox::BoundingBox(const BoundingBox& other) : + m_X(other.m_X), + m_Y(other.m_Y), + m_Width(other.m_Width), + m_Height(other.m_Height) {} + +int BoundingBox::GetX() const { + return m_X; +} + +int BoundingBox::GetY() const { + return m_Y; +} + +unsigned int BoundingBox::GetWidth() const { + return m_Width; +} + +unsigned int BoundingBox::GetHeight() const { + return m_Height; +} + +void BoundingBox::SetX(int x) { + m_X = x; +} + +void BoundingBox::SetY(int y) { + m_Y = y; +} + +void BoundingBox::SetWidth(unsigned int width) { + m_Width = width; +} + +void BoundingBox::SetHeight(unsigned int height) { + m_Height = height; +} + +BoundingBox& BoundingBox::operator=(const BoundingBox& other) { + m_X = other.m_X; + m_Y = other.m_Y; + + m_Width = other.m_Width; + m_Height = other.m_Height; + + return *this; +} + +/* Helper function to get a "valid" bounding box */ +void GetValidBoundingBox(const BoundingBox& boxIn, BoundingBox& boxOut, + const BoundingBox& boxLimits) { + boxOut.SetX(std::max(boxIn.GetX(), boxLimits.GetX())); + boxOut.SetY(std::max(boxIn.GetY(), boxLimits.GetY())); + + /* If we have changed x and/or y, we compensate by reducing the height and/or width */ + int boxOutWidth = static_cast(boxIn.GetWidth()) - + std::max(0, (boxOut.GetX() - boxIn.GetX())); + int boxOutHeight = static_cast(boxIn.GetHeight()) - + std::max(0, (boxOut.GetY() - boxIn.GetY())); + + /* This suggests that there was no overlap on x or/and y axis */ + if (boxOutHeight <= 0 || boxOutWidth <= 0) + { + boxOut = BoundingBox{0, 0, 0, 0}; + return; + } + + const int limitBoxRightX = boxLimits.GetX() + static_cast(boxLimits.GetWidth()); + const int limitBoxRightY = boxLimits.GetY() + static_cast(boxLimits.GetHeight()); + const int boxRightX = boxOut.GetX() + boxOutWidth; + const int boxRightY = boxOut.GetY() + boxOutHeight; + + if (boxRightX > limitBoxRightX) + { + boxOutWidth -= (boxRightX - limitBoxRightX); + } + + if (boxRightY > limitBoxRightY) + { + boxOutHeight -= (boxRightY - limitBoxRightY); + } + + /* This suggests value has rolled over because of very high numbers, not handled for now */ + if (boxOutHeight <= 0 || boxOutWidth <= 0) + { + boxOut = BoundingBox{0, 0, 0, 0}; + return; + } + + boxOut.SetHeight(static_cast(boxOutHeight)); + boxOut.SetWidth(static_cast(boxOutWidth)); +} +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/CmdArgsParser.cpp b/samples/ObjectDetection/src/CmdArgsParser.cpp new file mode 100644 index 0000000000..b8c74bc10f --- /dev/null +++ b/samples/ObjectDetection/src/CmdArgsParser.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CmdArgsParser.hpp" +#include +/* + * Checks that a particular option was specified by the user + */ +bool CheckOptionSpecified(const std::map& options, const std::string& option) +{ + auto it = options.find(option); + return it!=options.end(); +} + +/* + * Retrieves the user provided option + */ +std::string GetSpecifiedOption(const std::map& options, const std::string& option) +{ + if (CheckOptionSpecified(options, option)){ + return options.at(option); + } + else + { + throw std::invalid_argument("Required option: " + option + " not defined."); + } +} + +/* + * Parses all the command line options provided by the user and stores in a map. + */ +int ParseOptions(std::map& options, std::map& acceptedOptions, + char *argv[], int argc) +{ + for (int i = 1; i < argc; ++i) + { + std::string currentOption = std::string(argv[i]); + auto it = acceptedOptions.find(currentOption); + if (it != acceptedOptions.end()) + { + if (i + 1 < argc && std::string(argv[i + 1]).rfind("--", 0) != 0) + { + std::string value = argv[++i]; + options.insert({it->first, value}); + } + else if (std::string(argv[i]) == HELP) + { + std::cout << "Available options" << std::endl; + for (auto & acceptedOption : acceptedOptions) + { + std::cout << acceptedOption.first << " : " << acceptedOption.second << std::endl; + } + return 2; + } + else + { + std::cerr << std::string(argv[i]) << " option requires one argument." << std::endl; + return 1; + } + } + else + { + std::cerr << "Unrecognised option: " << std::string(argv[i]) << std::endl; + return 1; + } + } + return 0; +} diff --git a/samples/ObjectDetection/src/CvVideoFileWriter.cpp b/samples/ObjectDetection/src/CvVideoFileWriter.cpp new file mode 100644 index 0000000000..ab80b95d49 --- /dev/null +++ b/samples/ObjectDetection/src/CvVideoFileWriter.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CvVideoFileWriter.hpp" + +namespace od +{ + +void CvVideoFileWriter::Init(const std::string& outputVideo, int encoding, double fps, int width, int height) +{ + m_ready = m_cvWriter.open(outputVideo, cv::CAP_FFMPEG, + encoding, + fps, + cv::Size(width, height), true); +} + + +void CvVideoFileWriter::WriteFrame(std::shared_ptr& frame) +{ + if(m_cvWriter.isOpened()) + { + cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR); + m_cvWriter.write(*frame); + } +} + +bool CvVideoFileWriter::IsReady() const +{ + return m_ready; +} + +void CvVideoFileWriter::Close() +{ + m_cvWriter.release(); +} +}// namespace od diff --git a/samples/ObjectDetection/src/CvVideoFrameReader.cpp b/samples/ObjectDetection/src/CvVideoFrameReader.cpp new file mode 100644 index 0000000000..09b5050973 --- /dev/null +++ b/samples/ObjectDetection/src/CvVideoFrameReader.cpp @@ -0,0 +1,98 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + + +#include "CvVideoFrameReader.hpp" + +namespace od +{ + +std::shared_ptr CvVideoFrameReader::ReadFrame() +{ + // opencv copies data anyway + cv::Mat captureFrame; + m_capture.read(captureFrame); + return std::make_shared(std::move(captureFrame)); +} + +bool CvVideoFrameReader::IsExhausted(const std::shared_ptr& frame) const +{ + assert(frame!=nullptr); + return frame->empty(); +} + +void CvVideoFrameReader::CheckIsOpen(const std::string& source) +{ + if (!m_capture.isOpened()) + { + throw std::runtime_error("Failed to open video capture for the source = " + source); + } +} + +void CvVideoFrameReader::Init(const std::string& source) +{ + m_capture.open(source); + CheckIsOpen(source); +} + +int CvVideoFrameReader::GetSourceWidth() const +{ + return static_cast(lround(m_capture.get(cv::CAP_PROP_FRAME_WIDTH))); +} + +int CvVideoFrameReader::GetSourceHeight() const +{ + return static_cast(lround(m_capture.get(cv::CAP_PROP_FRAME_HEIGHT))); +} + +double CvVideoFrameReader::GetSourceFps() const +{ + return m_capture.get(cv::CAP_PROP_FPS); +} + +bool CvVideoFrameReader::ConvertToRGB() +{ + m_capture.set(cv::CAP_PROP_CONVERT_RGB, 1.0); + return static_cast(m_capture.get(cv::CAP_PROP_CONVERT_RGB)); +} + +std::string CvVideoFrameReader::GetSourceEncoding() const +{ + char fourccStr[5]; + auto fourcc = (int)m_capture.get(cv::CAP_PROP_FOURCC); + sprintf(fourccStr,"%c%c%c%c",fourcc & 0xFF, (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, (fourcc >> 24) & 0xFF); + return fourccStr; +} + +int CvVideoFrameReader::GetSourceEncodingInt() const +{ + return (int)m_capture.get(cv::CAP_PROP_FOURCC); +} + +int CvVideoFrameReader::GetFrameCount() const +{ + return static_cast(lround(m_capture.get(cv::CAP_PROP_FRAME_COUNT))); +}; + +std::shared_ptr CvVideoFrameReaderRgbWrapper::ReadFrame() +{ + auto framePtr = m_reader->ReadFrame(); + if (!IsExhausted(framePtr)) + { + cv::cvtColor(*framePtr, *framePtr, cv::COLOR_BGR2RGB); + } + return framePtr; +} + +bool CvVideoFrameReaderRgbWrapper::IsExhausted(const std::shared_ptr& frame) const +{ + return m_reader->IsExhausted(frame); +} + +CvVideoFrameReaderRgbWrapper::CvVideoFrameReaderRgbWrapper(std::unique_ptr reader): + m_reader(std::move(reader)) +{} + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/CvWindowOutput.cpp b/samples/ObjectDetection/src/CvWindowOutput.cpp new file mode 100644 index 0000000000..a32147b19a --- /dev/null +++ b/samples/ObjectDetection/src/CvWindowOutput.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CvWindowOutput.hpp" + +namespace od +{ + +void CvWindowOutput::Init(const std::string& windowName) +{ + m_windowName = windowName; + cv::namedWindow(m_windowName, cv::WINDOW_AUTOSIZE); +} + +void CvWindowOutput::WriteFrame(std::shared_ptr& frame) +{ + cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR); + cv::imshow( m_windowName, *frame); + cv::waitKey(30); +} + +void CvWindowOutput::Close() +{ + cv::destroyWindow(m_windowName); +} + +bool CvWindowOutput::IsReady() const +{ + return true; +} +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/DetectedObject.cpp b/samples/ObjectDetection/src/DetectedObject.cpp new file mode 100644 index 0000000000..95f99a07d6 --- /dev/null +++ b/samples/ObjectDetection/src/DetectedObject.cpp @@ -0,0 +1,65 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "DetectedObject.hpp" + +namespace od +{ + +DetectedObject::DetectedObject() : + DetectedObject(0u, "", BoundingBox(), 0u) +{} + +DetectedObject::DetectedObject( + unsigned int id, + std::string label, + const BoundingBox &boundingBox, + float score) : + m_Id(id), + m_Label(std::move(label)), + m_BoundingBox(boundingBox), + m_Score(score) +{} + +unsigned int DetectedObject::GetId() const +{ + return m_Id; +} + +const std::string &DetectedObject::GetLabel() const +{ + return m_Label; +} + +const BoundingBox &DetectedObject::GetBoundingBox() const +{ + return m_BoundingBox; +} + +float DetectedObject::GetScore() const +{ + return m_Score; +} + +void DetectedObject::SetId(unsigned int id) +{ + m_Id = id; +} + +void DetectedObject::SetLabel(const std::string &label) +{ + m_Label = label; +} + +void DetectedObject::SetBoundingBox(const BoundingBox &boundingBox) +{ + m_BoundingBox = boundingBox; +} + +void DetectedObject::SetScore(float score) +{ + m_Score = score; +} +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/ImageUtils.cpp b/samples/ObjectDetection/src/ImageUtils.cpp new file mode 100644 index 0000000000..9a3ed17b63 --- /dev/null +++ b/samples/ObjectDetection/src/ImageUtils.cpp @@ -0,0 +1,126 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ImageUtils.hpp" +#include "BoundingBox.hpp" +#include "Types.hpp" + +#include + +static cv::Scalar GetScalarColorCode(std::tuple color) +{ + return cv::Scalar(std::get<0>(color), std::get<1>(color), std::get<2>(color)); +} + +void AddInferenceOutputToFrame(od::DetectedObjects& decodedResults, cv::Mat& inputFrame, + std::vector>& labels) +{ + for(const od::DetectedObject& object : decodedResults) + { + int confidence = static_cast(object.GetScore() * 100); + int baseline = 0; + std::string textStr; + std::tuple colorCode(255, 0, 0); //red + + if (labels.size() > object.GetId()) + { + auto label = labels[object.GetId()]; + textStr = std::get<0>(label) + " - " + std::to_string(confidence) + "%"; + colorCode = std::get<1>(label).colorCode; + } + else + { + textStr = std::to_string(object.GetId()) + " - " + std::to_string(confidence) + "%"; + } + + cv::Size textSize = getTextSize(textStr, cv::FONT_HERSHEY_DUPLEX, 1.0, 1, &baseline); + + const od::BoundingBox& bbox = object.GetBoundingBox(); + + if (bbox.GetX() + bbox.GetWidth() > inputFrame.cols) + { + cv::Rect r(bbox.GetX(), bbox.GetY(), inputFrame.cols - bbox.GetX(), bbox.GetHeight()); + + cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0); + } + else if (bbox.GetY() + bbox.GetHeight() > inputFrame.rows) + { + cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), inputFrame.rows - bbox.GetY()); + + cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0); + } + else + { + cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), bbox.GetHeight()); + + cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0); + } + + int textBoxY = std::max(0 ,bbox.GetY() - textSize.height); + + cv::Rect text(bbox.GetX(), textBoxY, textSize.width, textSize.height); + + cv::rectangle(inputFrame, text, GetScalarColorCode(colorCode), -1); + + cv::Scalar color; + + if(std::get<0>(colorCode) + std::get<1>(colorCode) + std::get<2>(colorCode) > 127) + { + color = cv::Scalar::all(0); + } + else + { + color = cv::Scalar::all(255); + } + + cv::putText(inputFrame, + textStr , + cv::Point(bbox.GetX(), textBoxY + textSize.height -(textSize.height)/3), + cv::FONT_HERSHEY_DUPLEX, + 0.5, + color, + 1); + } +} + + +void ResizeFrame(const cv::Mat& frame, cv::Mat& dest, const od::Size& aspectRatio) +{ + if(&dest != &frame) + { + double longEdgeInput = std::max(frame.rows, frame.cols); + double longEdgeOutput = std::max(aspectRatio.m_Width, aspectRatio.m_Height); + const double resizeFactor = longEdgeOutput/longEdgeInput; + cv::resize(frame, dest, cv::Size(0, 0), resizeFactor, resizeFactor, DefaultResizeFlag); + } + else + { + const std::string warningMessage{"Resize was not performed because resized frame references the source frame."}; + ARMNN_LOG(warning) << warningMessage; + } +} + +/** Pad a frame with zeros (add rows and columns to the end) */ +void PadFrame(const cv::Mat& src, cv::Mat& dest, const int bottom, const int right) +{ + if(&dest != &src) + { + cv::copyMakeBorder(src, dest, 0, bottom, 0, right, cv::BORDER_CONSTANT); + } + else + { + const std::string warningMessage + { + "Pad was not performed because destination frame references the source frame." + }; + ARMNN_LOG(warning) << warningMessage; + } +} + +void ResizeWithPad(const cv::Mat& frame, cv::Mat& dest, cv::Mat& cache, const od::Size& destSize) +{ + ResizeFrame(frame, cache, destSize); + PadFrame(cache, dest,destSize.m_Height - cache.rows,destSize.m_Width - cache.cols); +} diff --git a/samples/ObjectDetection/src/Main.cpp b/samples/ObjectDetection/src/Main.cpp new file mode 100644 index 0000000000..10abb65cce --- /dev/null +++ b/samples/ObjectDetection/src/Main.cpp @@ -0,0 +1,160 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CvVideoFrameReader.hpp" +#include "CvWindowOutput.hpp" +#include "CvVideoFileWriter.hpp" +#include "NetworkPipeline.hpp" +#include "CmdArgsParser.hpp" + +#include +#include +#include +#include + +/* + * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector + */ +std::vector GetPreferredBackendList(const std::string& preferredBackends) +{ + std::vector backends; + std::stringstream ss(preferredBackends); + + while(ss.good()) + { + std::string backend; + std::getline( ss, backend, ',' ); + backends.emplace_back(backend); + } + return backends; +} + +/* + * Assigns a color to each label in the label set + */ +std::vector> AssignColourToLabel(const std::string& pathToLabelFile) +{ + std::ifstream in(pathToLabelFile); + std::vector> labels; + + std::string str; + std::default_random_engine generator; + std::uniform_int_distribution distribution(0,255); + + while (std::getline(in, str)) + { + if(!str.empty()) + { + od::BBoxColor c{ + .colorCode = std::make_tuple(distribution(generator), + distribution(generator), + distribution(generator)) + }; + auto bboxInfo = std::make_tuple (str, c); + + labels.emplace_back(bboxInfo); + } + } + return labels; +} + +std::tuple>, + std::unique_ptr>> + GetFrameSourceAndSink(const std::map& options) { + + std::unique_ptr> readerPtr; + + std::unique_ptr reader = std::make_unique(); + reader->Init(GetSpecifiedOption(options, VIDEO_FILE_PATH)); + + auto enc = reader->GetSourceEncodingInt(); + auto fps = reader->GetSourceFps(); + auto w = reader->GetSourceWidth(); + auto h = reader->GetSourceHeight(); + if (!reader->ConvertToRGB()) + { + readerPtr = std::move(std::make_unique(std::move(reader))); + } + else + { + readerPtr = std::move(reader); + } + + if(CheckOptionSpecified(options, OUTPUT_VIDEO_FILE_PATH)) + { + std::string outputVideo = GetSpecifiedOption(options, OUTPUT_VIDEO_FILE_PATH); + auto writer = std::make_unique(); + writer->Init(outputVideo, enc, fps, w, h); + + return std::make_tuple<>(std::move(readerPtr), std::move(writer)); + } + else + { + auto writer = std::make_unique(); + writer->Init("Processed Video"); + return std::make_tuple<>(std::move(readerPtr), std::move(writer)); + } +} + +int main(int argc, char *argv[]) +{ + std::map options; + + int result = ParseOptions(options, CMD_OPTIONS, argv, argc); + if (result != 0) + { + return result; + } + + // Create the network options + od::ODPipelineOptions pipelineOptions; + pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH); + pipelineOptions.m_ModelName = GetSpecifiedOption(options, MODEL_NAME); + + if(CheckOptionSpecified(options, PREFERRED_BACKENDS)) + { + pipelineOptions.m_backends = GetPreferredBackendList((GetSpecifiedOption(options, PREFERRED_BACKENDS))); + } + else + { + pipelineOptions.m_backends = {"CpuAcc", "CpuRef"}; + } + + auto labels = AssignColourToLabel(GetSpecifiedOption(options, LABEL_PATH)); + + od::IPipelinePtr objectDetectionPipeline = od::CreatePipeline(pipelineOptions); + + auto inputAndOutput = GetFrameSourceAndSink(options); + std::unique_ptr> reader = std::move(std::get<0>(inputAndOutput)); + std::unique_ptr> sink = std::move(std::get<1>(inputAndOutput)); + + if (!sink->IsReady()) + { + std::cerr << "Failed to open video writer."; + return 1; + } + + od::InferenceResults results; + + std::shared_ptr frame = reader->ReadFrame(); + + //pre-allocate frames + cv::Mat processed; + + while(!reader->IsExhausted(frame)) + { + objectDetectionPipeline->PreProcessing(*frame, processed); + objectDetectionPipeline->Inference(processed, results); + objectDetectionPipeline->PostProcessing(results, + [&frame, &labels](od::DetectedObjects detects) -> void { + AddInferenceOutputToFrame(detects, *frame, labels); + }); + + sink->WriteFrame(frame); + frame = reader->ReadFrame(); + } + sink->Close(); + return 0; +} diff --git a/samples/ObjectDetection/src/NetworkPipeline.cpp b/samples/ObjectDetection/src/NetworkPipeline.cpp new file mode 100644 index 0000000000..7f05882fc4 --- /dev/null +++ b/samples/ObjectDetection/src/NetworkPipeline.cpp @@ -0,0 +1,102 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NetworkPipeline.hpp" +#include "ImageUtils.hpp" + +namespace od +{ + +ObjDetectionPipeline::ObjDetectionPipeline(std::unique_ptr executor, + std::unique_ptr decoder) : + m_executor(std::move(executor)), + m_decoder(std::move(decoder)){} + +void od::ObjDetectionPipeline::Inference(const cv::Mat& processed, InferenceResults& result) +{ + m_executor->Run(processed.data, processed.total() * processed.elemSize(), result); +} + +void ObjDetectionPipeline::PostProcessing(InferenceResults& inferenceResult, + const std::function& callback) +{ + DetectedObjects detections = m_decoder->Decode(inferenceResult, m_inputImageSize, + m_executor->GetImageAspectRatio(), {}); + if (callback) + { + callback(detections); + } +} + +void ObjDetectionPipeline::PreProcessing(const cv::Mat& frame, cv::Mat& processed) +{ + m_inputImageSize.m_Height = frame.rows; + m_inputImageSize.m_Width = frame.cols; + ResizeWithPad(frame, processed, m_processedFrame, m_executor->GetImageAspectRatio()); +} + +MobileNetSSDv1::MobileNetSSDv1(std::unique_ptr executor, + float objectThreshold) : + ObjDetectionPipeline(std::move(executor), + std::make_unique(objectThreshold)) +{} + +void MobileNetSSDv1::PreProcessing(const cv::Mat& frame, cv::Mat& processed) +{ + ObjDetectionPipeline::PreProcessing(frame, processed); + if (m_executor->GetInputDataType() == armnn::DataType::Float32) + { + // [0, 255] => [-1.0, 1.0] + processed.convertTo(processed, CV_32FC3, 1 / 127.5, -1); + } +} + +YoloV3Tiny::YoloV3Tiny(std::unique_ptr executor, + float NMSThreshold, float ClsThreshold, float ObjectThreshold) : + ObjDetectionPipeline(std::move(executor), + std::move(std::make_unique(NMSThreshold, + ClsThreshold, + ObjectThreshold))) +{} + +void YoloV3Tiny::PreProcessing(const cv::Mat& frame, cv::Mat& processed) +{ + ObjDetectionPipeline::PreProcessing(frame, processed); + if (m_executor->GetInputDataType() == armnn::DataType::Float32) + { + processed.convertTo(processed, CV_32FC3); + } +} + +IPipelinePtr CreatePipeline(od::ODPipelineOptions& config) +{ + auto executor = std::make_unique(config.m_ModelFilePath, config.m_backends); + + if (config.m_ModelName == "SSD_MOBILE") + { + float detectionThreshold = 0.6; + + return std::make_unique(std::move(executor), + detectionThreshold + ); + } + else if (config.m_ModelName == "YOLO_V3_TINY") + { + float NMSThreshold = 0.6f; + float ClsThreshold = 0.6f; + float ObjectThreshold = 0.6f; + return std::make_unique(std::move(executor), + NMSThreshold, + ClsThreshold, + ObjectThreshold + ); + } + else + { + throw std::invalid_argument("Unknown Model name: " + config.m_ModelName + " supplied by user."); + } + +} +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/NonMaxSuppression.cpp b/samples/ObjectDetection/src/NonMaxSuppression.cpp new file mode 100644 index 0000000000..7bcd9045a5 --- /dev/null +++ b/samples/ObjectDetection/src/NonMaxSuppression.cpp @@ -0,0 +1,92 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "NonMaxSuppression.hpp" + +#include + +namespace od +{ + +static std::vector GenerateRangeK(unsigned int k) +{ + std::vector range(k); + std::iota(range.begin(), range.end(), 0); + return range; +} + + +/** +* @brief Returns the intersection over union for two bounding boxes +* +* @param[in] First detect containing bounding box. +* @param[in] Second detect containing bounding box. +* @return Calculated intersection over union. +* +*/ +static double IntersectionOverUnion(DetectedObject& detect1, DetectedObject& detect2) +{ + uint32_t area1 = (detect1.GetBoundingBox().GetHeight() * detect1.GetBoundingBox().GetWidth()); + uint32_t area2 = (detect2.GetBoundingBox().GetHeight() * detect2.GetBoundingBox().GetWidth()); + + float yMinIntersection = std::max(detect1.GetBoundingBox().GetY(), detect2.GetBoundingBox().GetY()); + float xMinIntersection = std::max(detect1.GetBoundingBox().GetX(), detect2.GetBoundingBox().GetX()); + + float yMaxIntersection = std::min(detect1.GetBoundingBox().GetY() + detect1.GetBoundingBox().GetHeight(), + detect2.GetBoundingBox().GetY() + detect2.GetBoundingBox().GetHeight()); + float xMaxIntersection = std::min(detect1.GetBoundingBox().GetX() + detect1.GetBoundingBox().GetWidth(), + detect2.GetBoundingBox().GetX() + detect2.GetBoundingBox().GetWidth()); + + double areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) * + std::max(xMaxIntersection - xMinIntersection, 0.0f); + double areaUnion = area1 + area2 - areaIntersection; + + return areaIntersection / areaUnion; +} + +std::vector NonMaxSuppression(DetectedObjects& inputDetections, float iouThresh) +{ + // Sort indicies of detections by highest score to lowest. + std::vector sortedIndicies = GenerateRangeK(inputDetections.size()); + std::sort(sortedIndicies.begin(), sortedIndicies.end(), + [&inputDetections](int idx1, int idx2) + { + return inputDetections[idx1].GetScore() > inputDetections[idx2].GetScore(); + }); + + std::vector visited(inputDetections.size(), false); + std::vector outputIndiciesAfterNMS; + + for (int i=0; i < inputDetections.size(); ++i) + { + // Each new unvisited detect should be kept. + if (!visited[sortedIndicies[i]]) + { + outputIndiciesAfterNMS.emplace_back(sortedIndicies[i]); + visited[sortedIndicies[i]] = true; + } + + // Look for detections to suppress. + for (int j=i+1; j iouThresh) + { + visited[sortedIndicies[j]] = true; + } + } + } + } + } + return outputIndiciesAfterNMS; +} + +} // namespace od diff --git a/samples/ObjectDetection/src/SSDResultDecoder.cpp b/samples/ObjectDetection/src/SSDResultDecoder.cpp new file mode 100644 index 0000000000..a3319212e5 --- /dev/null +++ b/samples/ObjectDetection/src/SSDResultDecoder.cpp @@ -0,0 +1,80 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "SSDResultDecoder.hpp" + +#include +#include +#include +#include +namespace od +{ + +DetectedObjects SSDResultDecoder::Decode(const InferenceResults& networkResults, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector& labels) +{ + // SSD network outputs 4 tensors: bounding boxes, labels, probabilities, number of detections. + if (networkResults.size() != 4) + { + throw std::runtime_error("Number of outputs from SSD model doesn't equal 4"); + } + + DetectedObjects detectedObjects; + const int numDetections = static_cast(std::lround(networkResults[3][0])); + + double longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height); + double longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height); + const double resizeFactor = longEdgeOutput/longEdgeInput; + + for (int i=0; i m_objectThreshold) + { + DetectedObject detectedObject; + detectedObject.SetScore(networkResults[2][i]); + auto classId = std::lround(networkResults[1][i]); + + if (classId < labels.size()) + { + detectedObject.SetLabel(labels[classId]); + } + else + { + detectedObject.SetLabel(std::to_string(classId)); + } + detectedObject.SetId(classId); + + // Convert SSD bbox outputs (ratios of image size) to pixel values. + double topLeftY = networkResults[0][i*4 + 0] * resizedFrameSize.m_Height; + double topLeftX = networkResults[0][i*4 + 1] * resizedFrameSize.m_Width; + double botRightY = networkResults[0][i*4 + 2] * resizedFrameSize.m_Height; + double botRightX = networkResults[0][i*4 + 3] * resizedFrameSize.m_Width; + + // Scale the coordinates to output frame size. + topLeftY *= resizeFactor; + topLeftX *= resizeFactor; + botRightY *= resizeFactor; + botRightX *= resizeFactor; + + assert(botRightX > topLeftX); + assert(botRightY > topLeftY); + + // Internal BoundingBox stores box top left x,y and width, height. + detectedObject.SetBoundingBox({static_cast(std::round(topLeftX)), + static_cast(std::round(topLeftY)), + static_cast(botRightX - topLeftX), + static_cast(botRightY - topLeftY)}); + + detectedObjects.emplace_back(detectedObject); + } + } + return detectedObjects; +} + +SSDResultDecoder::SSDResultDecoder(float ObjectThreshold) : m_objectThreshold(ObjectThreshold) {} + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/src/YoloResultDecoder.cpp b/samples/ObjectDetection/src/YoloResultDecoder.cpp new file mode 100644 index 0000000000..ffbf7cb68d --- /dev/null +++ b/samples/ObjectDetection/src/YoloResultDecoder.cpp @@ -0,0 +1,100 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "YoloResultDecoder.hpp" + +#include "NonMaxSuppression.hpp" + +#include +#include + +namespace od +{ + +DetectedObjects YoloResultDecoder::Decode(const InferenceResults& networkResults, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector& labels) +{ + + // Yolo v3 network outputs 1 tensor + if (networkResults.size() != 1) + { + throw std::runtime_error("Number of outputs from Yolo model doesn't equal 1"); + } + auto element_step = m_boxElements + m_confidenceElements + m_numClasses; + + float longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height); + float longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height); + const float resizeFactor = longEdgeOutput/longEdgeInput; + + DetectedObjects detectedObjects; + DetectedObjects resultsAfterNMS; + + for (const InferenceResult& result : networkResults) + { + for (unsigned int i = 0; i < m_numBoxes; ++i) + { + const float* cur_box = &result[i * element_step]; + // Objectness score + if (cur_box[4] > m_objectThreshold) + { + for (unsigned int classIndex = 0; classIndex < m_numClasses; ++classIndex) + { + const float class_prob = cur_box[4] * cur_box[5 + classIndex]; + + // class confidence + + if (class_prob > m_ClsThreshold) + { + DetectedObject detectedObject; + + detectedObject.SetScore(class_prob); + + float topLeftX = cur_box[0] * resizeFactor; + float topLeftY = cur_box[1] * resizeFactor; + float botRightX = cur_box[2] * resizeFactor; + float botRightY = cur_box[3] * resizeFactor; + + assert(botRightX > topLeftX); + assert(botRightY > topLeftY); + + detectedObject.SetBoundingBox({static_cast(topLeftX), + static_cast(topLeftY), + static_cast(botRightX-topLeftX), + static_cast(botRightY-topLeftY)}); + if(labels.size() > classIndex) + { + detectedObject.SetLabel(labels.at(classIndex)); + } + else + { + detectedObject.SetLabel(std::to_string(classIndex)); + } + detectedObject.SetId(classIndex); + detectedObjects.emplace_back(detectedObject); + } + } + } + } + + std::vector keepIndiciesAfterNMS = od::NonMaxSuppression(detectedObjects, m_NmsThreshold); + + for (const int ind: keepIndiciesAfterNMS) + { + resultsAfterNMS.emplace_back(detectedObjects[ind]); + } + } + + return resultsAfterNMS; +} + +YoloResultDecoder::YoloResultDecoder(float NMSThreshold, float ClsThreshold, float ObjectThreshold) + : m_NmsThreshold(NMSThreshold), m_ClsThreshold(ClsThreshold), m_objectThreshold(ObjectThreshold) {} + +}// namespace od + + + diff --git a/samples/ObjectDetection/test/BoundingBoxTests.cpp b/samples/ObjectDetection/test/BoundingBoxTests.cpp new file mode 100644 index 0000000000..a8ed29a977 --- /dev/null +++ b/samples/ObjectDetection/test/BoundingBoxTests.cpp @@ -0,0 +1,177 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include "BoundingBox.hpp" + +namespace +{ + static constexpr unsigned int s_X = 100u; + static constexpr unsigned int s_Y = 200u; + static constexpr unsigned int s_W = 300u; + static constexpr unsigned int s_H = 400u; +} // anonymous namespace + +TEST_CASE("BoundingBoxTest_Default") +{ + od::BoundingBox boundingBox; + + REQUIRE(boundingBox.GetX() == 0u); + REQUIRE(boundingBox.GetY() == 0u); + REQUIRE(boundingBox.GetWidth() == 0u); + REQUIRE(boundingBox.GetHeight() == 0u); +} + +TEST_CASE("BoundingBoxTest_Custom") +{ + od::BoundingBox boundingBox(s_X, s_Y, s_W, s_H); + + REQUIRE(boundingBox.GetX() == s_X); + REQUIRE(boundingBox.GetY() == s_Y); + REQUIRE(boundingBox.GetWidth() == s_W); + REQUIRE(boundingBox.GetHeight() == s_H); +} + +TEST_CASE("BoundingBoxTest_Setters") +{ + od::BoundingBox boundingBox; + + boundingBox.SetX(s_X); + boundingBox.SetY(s_Y); + boundingBox.SetWidth(s_W); + boundingBox.SetHeight(s_H); + + REQUIRE(boundingBox.GetX() == s_X); + REQUIRE(boundingBox.GetY() == s_Y); + REQUIRE(boundingBox.GetWidth() == s_W); + REQUIRE(boundingBox.GetHeight() == s_H); +} + +static inline bool AreBoxesEqual(od::BoundingBox& b1, od::BoundingBox& b2) +{ + return (b1.GetX() == b2.GetX() && b1.GetY() == b2.GetY() && + b1.GetWidth() == b2.GetWidth() && b1.GetHeight() == b2.GetHeight()); +} + +TEST_CASE("BoundingBoxTest_GetValidBoundingBox") +{ + od::BoundingBox boxIn { 0, 0, 10, 20 }; + od::BoundingBox boxOut; + + WHEN("Limiting box is completely within the input box") + { + od::BoundingBox boxLmt{ 1, 1, 9, 18 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxLmt,boxOut)); + } + + WHEN("Limiting box cuts off the top and left") + { + od::BoundingBox boxLmt{ 1, 1, 10, 20 }; + od::BoundingBox boxExp{ 1, 1, 9, 19 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + WHEN("Limiting box cuts off the bottom") + { + od::BoundingBox boxLmt{ 0, 0, 10, 19 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxLmt, boxOut)); + } + + WHEN("Limiting box cuts off the right") + { + od::BoundingBox boxLmt{ 0, 0, 9, 20 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxLmt, boxOut)); + } + + WHEN("Limiting box cuts off the bottom and right") + { + od::BoundingBox boxLmt{ 0, 0, 9, 19 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxLmt, boxOut)); + } + + WHEN("Limiting box cuts off the bottom and left") + { + od::BoundingBox boxLmt{ 1, 0, 10, 19 }; + od::BoundingBox boxExp{ 1, 0, 9, 19 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + WHEN("Limiting box does not impose any limit") + { + od::BoundingBox boxLmt{ 0, 0, 10, 20 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxIn, boxOut)); + } + + WHEN("Limiting box zeros out the width") + { + od::BoundingBox boxLmt{ 0, 0, 0, 20 }; + od::BoundingBox boxExp{ 0, 0, 0, 0 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + WHEN("Limiting box zeros out the height") + { + od::BoundingBox boxLmt{ 0, 0, 10, 0 }; + od::BoundingBox boxExp{ 0, 0, 0, 0 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + WHEN("Limiting box with negative starts - top and left with 1 sq pixel cut-off") + { + od::BoundingBox boxLmt{ -1, -1, 10, 20 }; + od::BoundingBox boxExp{ 0, 0, 9, 19 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + WHEN("Limiting box with negative starts - top and left with full overlap") + { + od::BoundingBox boxLmt{ -1, -1, 11, 21 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxIn, boxOut)); + } + + WHEN("Limiting box with zero overlap") + { + od::BoundingBox boxLmt{-10,-20, 10, 20 }; + od::BoundingBox boxExp{ 0, 0, 0, 0 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + WHEN("Limiting box with one square pixel overlap") + { + od::BoundingBox boxLmt{-9,-19, 10, 20 }; + od::BoundingBox boxExp{ 0, 0, 1, 1 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + WHEN("Limiting box with unrealistically high values in positive quadrant") + { + od::BoundingBox boxLmt{INT32_MAX, INT32_MAX, UINT32_MAX, UINT32_MAX }; + od::BoundingBox boxExp{ 0, 0, 0, 0 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } + + /* This should actually return a valid bounding box, currently not handled. */ + WHEN("Limiting box with unrealistic values spanning 32 bit space") + { + od::BoundingBox boxLmt{-(INT32_MAX), -(INT32_MAX), UINT32_MAX, UINT32_MAX}; + od::BoundingBox boxExp{ 0, 0, 0, 0 }; + GetValidBoundingBox(boxIn, boxOut, boxLmt); + REQUIRE(AreBoxesEqual(boxExp, boxOut)); + } +} \ No newline at end of file diff --git a/samples/ObjectDetection/test/FrameReaderTest.cpp b/samples/ObjectDetection/test/FrameReaderTest.cpp new file mode 100644 index 0000000000..a4bda227b3 --- /dev/null +++ b/samples/ObjectDetection/test/FrameReaderTest.cpp @@ -0,0 +1,103 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#define CATCH_CONFIG_MAIN + +#include +#include + +#include "IFrameReader.hpp" +#include "CvVideoFrameReader.hpp" + +SCENARIO("Read frames from video file using CV frame reader", "[framereader]") { + + GIVEN("a valid video file") { + + std::string testResources = TEST_RESOURCE_DIR; + REQUIRE(testResources != ""); + std::string file = testResources + "/" + "Megamind.avi"; + WHEN("Frame reader is initialised") { + + od::CvVideoFrameReader reader; + THEN("no exception is thrown") { + reader.Init(file); + + AND_WHEN("when source parameters are read") { + + auto fps = reader.GetSourceFps(); + auto height = reader.GetSourceHeight(); + auto width = reader.GetSourceWidth(); + auto encoding = reader.GetSourceEncoding(); + auto framesCount = reader.GetFrameCount(); + + THEN("they are aligned with video file") { + + REQUIRE(height == 528); + REQUIRE(width == 720); + REQUIRE(encoding == "XVID"); + REQUIRE(fps == 23.976); + REQUIRE(framesCount == 270); + } + + } + + AND_WHEN("frame is read") { + auto framePtr = reader.ReadFrame(); + + THEN("it is not a NULL pointer") { + REQUIRE(framePtr != nullptr); + } + + AND_THEN("it is not empty") { + REQUIRE(!framePtr->empty()); + REQUIRE(!reader.IsExhausted(framePtr)); + } + } + + AND_WHEN("all frames were read from the file") { + + for (int i = 0; i < 270; i++) { + auto framePtr = reader.ReadFrame(); + } + + THEN("last + 1 frame is empty") { + auto framePtr = reader.ReadFrame(); + + REQUIRE(framePtr->empty()); + REQUIRE(reader.IsExhausted(framePtr)); + } + + } + + AND_WHEN("frames are read from the file, pointers point to the different objects") { + + auto framePtr = reader.ReadFrame(); + + cv::Mat *frame = framePtr.get(); + + for (int i = 0; i < 30; i++) { + REQUIRE(frame != reader.ReadFrame().get()); + } + + } + } + } + } + + GIVEN("an invalid video file") { + + std::string file = "nosuchfile.avi"; + + WHEN("Frame reader is initialised") { + + od::CvVideoFrameReader reader; + + THEN("exception is thrown") { + REQUIRE_THROWS(reader.Init(file)); + } + } + + } +} \ No newline at end of file diff --git a/samples/ObjectDetection/test/ImageUtilsTest.cpp b/samples/ObjectDetection/test/ImageUtilsTest.cpp new file mode 100644 index 0000000000..e486ae192b --- /dev/null +++ b/samples/ObjectDetection/test/ImageUtilsTest.cpp @@ -0,0 +1,128 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include +#include +#include "ImageUtils.hpp" +#include "Types.hpp" + +std::vector> GetBoundingBoxPoints(std::vector& decodedResults, + cv::Mat imageMat) +{ + std::vector> bboxes; + for(const od::DetectedObject& object : decodedResults) + { + const od::BoundingBox& bbox = object.GetBoundingBox(); + + if (bbox.GetX() + bbox.GetWidth() > imageMat.cols) + { + for (int y = bbox.GetY(); y < bbox.GetY() + bbox.GetHeight(); ++y) + { + bboxes.emplace_back(std::tuple{bbox.GetX(), y}); + } + + for (int x = bbox.GetX(); x < imageMat.cols; ++x) + { + bboxes.emplace_back(std::tuple{x, bbox.GetY() + bbox.GetHeight() - 1}); + } + + for (int y = bbox.GetY(); y < bbox.GetY() + bbox.GetHeight(); ++y) + { + bboxes.emplace_back(std::tuple{imageMat.cols - 1, y}); + } + } + else if (bbox.GetY() + bbox.GetHeight() > imageMat.rows) + { + for (int y = bbox.GetY(); y < imageMat.rows; ++y) + { + bboxes.emplace_back(std::tuple{bbox.GetX(), y}); + } + + for (int x = bbox.GetX(); x < bbox.GetX() + bbox.GetWidth(); ++x) + { + bboxes.emplace_back(std::tuple{x, imageMat.rows - 1}); + } + + for (int y = bbox.GetY(); y < imageMat.rows; ++y) + { + bboxes.emplace_back(std::tuple{bbox.GetX() + bbox.GetWidth() - 1, y}); + } + } + else + { + for (int y = bbox.GetY(); y < bbox.GetY() + bbox.GetHeight(); ++y) + { + bboxes.emplace_back(std::tuple{bbox.GetX(), y}); + } + + for (int x = bbox.GetX(); x < bbox.GetX() + bbox.GetWidth(); ++x) + { + bboxes.emplace_back(std::tuple{x, bbox.GetY() + bbox.GetHeight() - 1}); + } + + for (int y = bbox.GetY(); y < bbox.GetY() + bbox.GetHeight(); ++y) + { + bboxes.emplace_back(std::tuple{bbox.GetX() + bbox.GetWidth() - 1, y}); + } + } + } + return bboxes; +} + +static std::string GetResourceFilePath(std::string filename) +{ + std::string testResources = TEST_RESOURCE_DIR; + if (0 == testResources.size()) + { + throw "Invalid test resources directory provided"; + } + else + { + if(testResources.back() != '/') + { + return testResources + "/" + filename; + } + else + { + return testResources + filename; + } + } +} + +TEST_CASE("Test Adding Inference output to frame") +{ + //todo: re-write test to use static detections + + std::string testResources = TEST_RESOURCE_DIR; + REQUIRE(testResources != ""); + std::vector> labels; + + od::BBoxColor c + { + .colorCode = std::make_tuple (0, 0, 255) + }; + + auto bboxInfo = std::make_tuple ("person", c); + od::BoundingBox bbox(10, 10, 50, 50); + od::DetectedObject detection(0, "person", bbox, 0.75); + + labels.push_back(bboxInfo); + + od::DetectedObjects detections; + cv::Mat frame = cv::imread(GetResourceFilePath("basketball1.png"), cv::IMREAD_COLOR); + detections.push_back(detection); + + AddInferenceOutputToFrame(detections, frame, labels); + + std::vector> bboxes = GetBoundingBoxPoints(detections, frame); + + // Check that every point is the expected color + for(std::tuple tuple : bboxes) + { + cv::Point p(std::get<0>(tuple), std::get<1>(tuple)); + CHECK(static_cast(frame.at(p)[0]) == 0); + CHECK(static_cast(frame.at(p)[1]) == 0); + CHECK(static_cast(frame.at(p)[2]) == 255); + } +} diff --git a/samples/ObjectDetection/test/NMSTests.cpp b/samples/ObjectDetection/test/NMSTests.cpp new file mode 100644 index 0000000000..d8b7c11ae1 --- /dev/null +++ b/samples/ObjectDetection/test/NMSTests.cpp @@ -0,0 +1,90 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include "NonMaxSuppression.hpp" + +TEST_CASE("Non_Max_Suppression_1") +{ + // Box with iou exactly 0.5. + od::DetectedObject detectedObject1; + detectedObject1.SetLabel("2"); + detectedObject1.SetScore(171); + detectedObject1.SetBoundingBox({0, 0, 150, 150}); + + // Strongest detection. + od::DetectedObject detectedObject2; + detectedObject2.SetLabel("2"); + detectedObject2.SetScore(230); + detectedObject2.SetBoundingBox({0, 75, 150, 75}); + + // Weaker detection with same coordinates of strongest. + od::DetectedObject detectedObject3; + detectedObject3.SetLabel("2"); + detectedObject3.SetScore(20); + detectedObject3.SetBoundingBox({0, 75, 150, 75}); + + // Detection not overlapping strongest. + od::DetectedObject detectedObject4; + detectedObject4.SetLabel("2"); + detectedObject4.SetScore(222); + detectedObject4.SetBoundingBox({0, 0, 50, 50}); + + // Small detection inside strongest. + od::DetectedObject detectedObject5; + detectedObject5.SetLabel("2"); + detectedObject5.SetScore(201); + detectedObject5.SetBoundingBox({100, 100, 20, 20}); + + // Box with iou exactly 0.5 but different label. + od::DetectedObject detectedObject6; + detectedObject6.SetLabel("1"); + detectedObject6.SetScore(75); + detectedObject6.SetBoundingBox({0, 0, 150, 150}); + + od::DetectedObjects expectedResults {detectedObject1, + detectedObject2, + detectedObject3, + detectedObject4, + detectedObject5, + detectedObject6}; + + auto sorted = od::NonMaxSuppression(expectedResults, 0.49); + + // 1st and 3rd detection should be suppressed. + REQUIRE(sorted.size() == 4); + + // Final detects should be ordered strongest to weakest. + REQUIRE(sorted[0] == 1); + REQUIRE(sorted[1] == 3); + REQUIRE(sorted[2] == 4); + REQUIRE(sorted[3] == 5); +} + +TEST_CASE("Non_Max_Suppression_2") +{ + // Real box examples. + od::DetectedObject detectedObject1; + detectedObject1.SetLabel("2"); + detectedObject1.SetScore(220); + detectedObject1.SetBoundingBox({430, 158, 68, 68}); + + od::DetectedObject detectedObject2; + detectedObject2.SetLabel("2"); + detectedObject2.SetScore(171); + detectedObject2.SetBoundingBox({438, 158, 68, 68}); + + od::DetectedObjects expectedResults {detectedObject1, + detectedObject2}; + + auto sorted = od::NonMaxSuppression(expectedResults, 0.5); + + // 2nd detect should be suppressed. + REQUIRE(sorted.size() == 1); + + // First detect should be strongest and kept. + REQUIRE(sorted[0] == 0); +} diff --git a/samples/ObjectDetection/test/PipelineTest.cpp b/samples/ObjectDetection/test/PipelineTest.cpp new file mode 100644 index 0000000000..289f44f5e9 --- /dev/null +++ b/samples/ObjectDetection/test/PipelineTest.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include +#include +#include +#include "Types.hpp" + +static std::string GetResourceFilePath(const std::string& filename) +{ + std::string testResources = TEST_RESOURCE_DIR; + if (0 == testResources.size()) + { + throw "Invalid test resources directory provided"; + } + else + { + if(testResources.back() != '/') + { + return testResources + "/" + filename; + } + else + { + return testResources + filename; + } + } +} + +TEST_CASE("Test Network Execution SSD_MOBILE") +{ + std::string testResources = TEST_RESOURCE_DIR; + REQUIRE(testResources != ""); + // Create the network options + od::ODPipelineOptions options; + options.m_ModelFilePath = GetResourceFilePath("detect.tflite"); + options.m_ModelName = "SSD_MOBILE"; + options.m_backends = {"CpuAcc", "CpuRef"}; + + od::IPipelinePtr objectDetectionPipeline = od::CreatePipeline(options); + + od::InferenceResults results; + cv::Mat processed; + cv::Mat inputFrame = cv::imread(GetResourceFilePath("basketball1.png"), cv::IMREAD_COLOR); + cv::cvtColor(inputFrame, inputFrame, cv::COLOR_BGR2RGB); + + objectDetectionPipeline->PreProcessing(inputFrame, processed); + + CHECK(processed.type() == CV_8UC3); + CHECK(processed.cols == 300); + CHECK(processed.rows == 300); + + objectDetectionPipeline->Inference(processed, results); + objectDetectionPipeline->PostProcessing(results, + [](od::DetectedObjects detects) -> void { + CHECK(detects.size() == 2); + CHECK(detects[0].GetLabel() == "0"); + }); + +} -- cgit v1.2.1