From 919c14ef132986aa1514b2070ce6d19b5579a6ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89anna=20=C3=93=20Cath=C3=A1in?= Date: Mon, 14 Sep 2020 17:36:49 +0100 Subject: MLECO-929 Add Object Detection sample application using the public ArmNN C++ API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change-Id: I14aa1b4b726212cffbefd6687203f93f936fa872 Signed-off-by: Éanna Ó Catháin --- .../include/ArmnnNetworkExecutor.hpp | 80 +++++++++++ samples/ObjectDetection/include/BoundingBox.hpp | 108 +++++++++++++++ samples/ObjectDetection/include/CmdArgsParser.hpp | 50 +++++++ .../ObjectDetection/include/CvVideoFileWriter.hpp | 61 +++++++++ .../ObjectDetection/include/CvVideoFrameReader.hpp | 108 +++++++++++++++ samples/ObjectDetection/include/CvWindowOutput.hpp | 53 ++++++++ samples/ObjectDetection/include/DetectedObject.hpp | 96 +++++++++++++ .../include/IDetectionResultDecoder.hpp | 39 ++++++ samples/ObjectDetection/include/IFrameOutput.hpp | 48 +++++++ samples/ObjectDetection/include/IFrameReader.hpp | 45 +++++++ samples/ObjectDetection/include/ImageUtils.hpp | 58 ++++++++ .../ObjectDetection/include/NetworkPipeline.hpp | 148 +++++++++++++++++++++ .../ObjectDetection/include/NonMaxSuppression.hpp | 28 ++++ .../ObjectDetection/include/SSDResultDecoder.hpp | 32 +++++ samples/ObjectDetection/include/Types.hpp | 50 +++++++ .../ObjectDetection/include/YoloResultDecoder.hpp | 43 ++++++ 16 files changed, 1047 insertions(+) create mode 100644 samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp create mode 100644 samples/ObjectDetection/include/BoundingBox.hpp create mode 100644 samples/ObjectDetection/include/CmdArgsParser.hpp create mode 100644 samples/ObjectDetection/include/CvVideoFileWriter.hpp create mode 100644 samples/ObjectDetection/include/CvVideoFrameReader.hpp create mode 100644 samples/ObjectDetection/include/CvWindowOutput.hpp create mode 100644 samples/ObjectDetection/include/DetectedObject.hpp create mode 100644 samples/ObjectDetection/include/IDetectionResultDecoder.hpp create mode 100644 samples/ObjectDetection/include/IFrameOutput.hpp create mode 100644 samples/ObjectDetection/include/IFrameReader.hpp create mode 100644 samples/ObjectDetection/include/ImageUtils.hpp create mode 100644 samples/ObjectDetection/include/NetworkPipeline.hpp create mode 100644 samples/ObjectDetection/include/NonMaxSuppression.hpp create mode 100644 samples/ObjectDetection/include/SSDResultDecoder.hpp create mode 100644 samples/ObjectDetection/include/Types.hpp create mode 100644 samples/ObjectDetection/include/YoloResultDecoder.hpp (limited to 'samples/ObjectDetection/include') diff --git a/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp b/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp new file mode 100644 index 0000000000..c75b68bbe1 --- /dev/null +++ b/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Types.hpp" + +#include "armnn/ArmNN.hpp" +#include "armnnTfLiteParser/ITfLiteParser.hpp" +#include "armnnUtils/DataLayoutIndexed.hpp" +#include + +#include +#include + +namespace od +{ +/** +* @brief Used to load in a network through ArmNN and run inference on it against a given backend. +* +*/ +class ArmnnNetworkExecutor +{ +private: + armnn::IRuntimePtr m_Runtime; + armnn::NetworkId m_NetId{}; + mutable InferenceResults m_OutputBuffer; + armnn::InputTensors m_InputTensors; + armnn::OutputTensors m_OutputTensors; + std::vector m_outputBindingInfo; + + std::vector m_outputLayerNamesList; + + armnnTfLiteParser::BindingPointInfo m_inputBindingInfo; + + void PrepareTensors(const void* inputData, const size_t dataBytes); + + template + auto log_as_int(Enumeration value) + -> typename std::underlying_type::type + { + return static_cast::type>(value); + } + +public: + ArmnnNetworkExecutor() = delete; + + /** + * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a + * given backend. + * + * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors + * in output of the Run method. + * + * * @param[in] modelPath - Relative path to the model file + * * @param[in] backends - The list of preferred backends to run inference on + */ + ArmnnNetworkExecutor(std::string& modelPath, + std::vector& backends); + + /** + * @brief Returns the aspect ratio of the associated model in the order of width, height. + */ + Size GetImageAspectRatio(); + + armnn::DataType GetInputDataType() const; + + /** + * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object. + * + * @param[in] inputData - input frame data + * @param[in] dataBytes - input data size in bytes + * @param[out] results - Vector of DetectionResult objects used to store the output result. + */ + bool Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults); + +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/BoundingBox.hpp b/samples/ObjectDetection/include/BoundingBox.hpp new file mode 100644 index 0000000000..2b790401db --- /dev/null +++ b/samples/ObjectDetection/include/BoundingBox.hpp @@ -0,0 +1,108 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +namespace od +{ +/** +* @brief Class used to store and receive bounding box location and size information +* +*/ +class BoundingBox +{ +public: + /** + * @brief Default constructor + */ + BoundingBox(); + + /** + * @brief Constructor with parameters to configure the bounding box dimensions + * @param[in] x int value representing the x coordinate. + * @param[in] y int value representing the y coordinate. + * @param[in] width unsigned int value representing the width value. + * @param[in] height unsigned int value representing the height value. + */ + BoundingBox(int x, int y, unsigned int width, unsigned int height); + + /** + * @brief Constructor with a BoundingBox type parameter to copy from. + * @param[in] other Bounding box to copy. + */ + BoundingBox(const BoundingBox& other); + + ~BoundingBox() = default; + + /** + * @brief Function to retrieve the X coordinate. + */ + int GetX() const; + + /** + * @brief Function to retrieve the Y coordinate. + */ + int GetY() const; + + /** + * @brief Function to retrieve the width. + */ + unsigned int GetWidth() const; + + /** + * @brief Function to retrieve the height. + */ + unsigned int GetHeight() const; + + /** + * @brief Function to set the X coordinate. + * @param[in] x int value representing x coordinate + */ + void SetX(int x); + + /** + * @brief Function to set the Y coordinate. + * @param[in] y int value representing y coordinate + */ + void SetY(int y); + + /** + * @brief Function to set the width of the BoundingBox. + * @param[in] width int value representing the width + */ + void SetWidth(unsigned int width); + + /** + * @brief Function to set the height of the BoundingBox. + * @param[in] height int value representing the height + */ + void SetHeight(unsigned int height); + + /** + * @brief Function to check equality with another BoundingBox + * @param[in] other BoundingBox to compare with + */ + BoundingBox& operator=(const BoundingBox& other); + +private: + int m_X; + int m_Y; + unsigned int m_Width; + unsigned int m_Height; +}; + +/* + * @brief: Get a bounding box within the limits of another bounding box + * + * @param[in] boxIn Input bounding box + * @param[out] boxOut Output bounding box + * @param[in] boxLimits Bounding box defining the limits which the output + * needs to conform to. + * @return none + */ +void GetValidBoundingBox(const BoundingBox& boxIn, BoundingBox& boxOut, + const BoundingBox& boxLimits); + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/CmdArgsParser.hpp b/samples/ObjectDetection/include/CmdArgsParser.hpp new file mode 100644 index 0000000000..6c22e6ff6d --- /dev/null +++ b/samples/ObjectDetection/include/CmdArgsParser.hpp @@ -0,0 +1,50 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once +#include +#include +#include + +const std::string MODEL_NAME = "--model-name"; +const std::string VIDEO_FILE_PATH = "--video-file-path"; +const std::string MODEL_FILE_PATH = "--model-file-path"; +const std::string OUTPUT_VIDEO_FILE_PATH = "--output-video-file-path"; +const std::string LABEL_PATH = "--label-path"; +const std::string PREFERRED_BACKENDS = "--preferred-backends"; +const std::string HELP = "--help"; + +/* + * The accepted options for this Object detection executable + */ +static std::map CMD_OPTIONS = { + {VIDEO_FILE_PATH, "[REQUIRED] Path to the video file to run object detection on"}, + {MODEL_FILE_PATH, "[REQUIRED] Path to the Object Detection model to use"}, + {LABEL_PATH, "[REQUIRED] Path to the label set for the provided model file. " + "Label file is should just be an ordered list, seperated by new line."}, + {MODEL_NAME, "[REQUIRED] The name of the model being used. Accepted options: YOLO_V3_TINY, SSD_MOBILE"}, + {OUTPUT_VIDEO_FILE_PATH, "[OPTIONAL] Path to the output video file with detections added in. " + "If specified will save file to disk, else displays the output to screen"}, + {PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma." + " For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]." + " Defaults to CpuAcc,CpuRef"} +}; + +/* + * Checks that a particular option was specified by the user + */ +bool CheckOptionSpecified(const std::map& options, const std::string& option); + + +/* + * Retrieves the user provided option + */ +std::string GetSpecifiedOption(const std::map& options, const std::string& option); + + +/* + * Parses all the command line options provided by the user and stores in a map. + */ +int ParseOptions(std::map& options, std::map& acceptedOptions, + char *argv[], int argc); \ No newline at end of file diff --git a/samples/ObjectDetection/include/CvVideoFileWriter.hpp b/samples/ObjectDetection/include/CvVideoFileWriter.hpp new file mode 100644 index 0000000000..ea1501b68e --- /dev/null +++ b/samples/ObjectDetection/include/CvVideoFileWriter.hpp @@ -0,0 +1,61 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "IFrameOutput.hpp" +#include + +namespace od +{ + +class CvVideoFileWriter : public IFrameOutput { +public: + /** + * @brief Default constructor. + * + * Underlying open cv video writer object will be instantiated. + */ + CvVideoFileWriter() = default; + + ~CvVideoFileWriter() override = default; + + /** + * @brief Initialises video file writer. + * + * Opens opencv writer with given params. FFMPEG backend is used. + * + * @param outputVideo path to the video file. + * @param encoding cv::CAP_PROP_FOURCC code. + * @param fps target frame rate. + * @param width target frame width. + * @param height target frame height. + * + */ + void Init(const std::string& outputVideo, int encoding, double fps, int width, int height); + + /** + * Writes frame to the file using opencv writer. + * + * @param frame data to write. + */ + void WriteFrame(std::shared_ptr& frame) override; + + /** + * Releases opencv writer. + */ + void Close() override; + + /** + * Checks if opencv writer was successfully opened. + * @return true is underlying writer is ready to be used, false otherwise. + */ + bool IsReady() const override; + +private: + cv::VideoWriter m_cvWriter{}; + bool m_ready = false; +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/CvVideoFrameReader.hpp b/samples/ObjectDetection/include/CvVideoFrameReader.hpp new file mode 100644 index 0000000000..081f92620e --- /dev/null +++ b/samples/ObjectDetection/include/CvVideoFrameReader.hpp @@ -0,0 +1,108 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + + +#include "IFrameReader.hpp" +#include + +namespace od +{ + +class CvVideoFrameReader : + public IFrameReader +{ +public: + /** + * @brief Default constructor. + * + * Underlying open cv video capture object will be instantiated. + */ + CvVideoFrameReader() = default; + + ~CvVideoFrameReader() override = default; + + /** + *@brief Initialises reader to capture frames from video file. + * + * @param source path to the video file or image sequence. + * + * @throws std::runtime_error if init failed + */ + void Init(const std::string& source); + + std::shared_ptr ReadFrame() override; + + bool IsExhausted(const std::shared_ptr & frame) const override; + + /** + * Returns effective video frame width supported by the source/set by the user. + * Must be called after Init method. + * @return frame width + */ + int GetSourceWidth() const; + + /** + * Returns effective video frame height supported by the source/set by the user. + * Must be called after Init method. + * @return frame height + */ + int GetSourceHeight() const; + + /** + * Returns effective fps value supported by the source/set by the user. + * @return fps value + */ + double GetSourceFps() const; + + /** + * Will query OpenCV to convert images to RGB + * Copy is actually default behaviour, but the set function needs to be called + * in order to know whether OpenCV supports conversion from our source format. + * @return boolean, + * true: OpenCV returns RGB + * false: OpenCV returns the fourcc format from GetSourceEncoding + */ + bool ConvertToRGB(); + + /** + * Returns 4-character code of codec. + * @return codec name + */ + std::string GetSourceEncoding() const; + + /** + * Get the fourcc int from its string name. + * @return codec int + */ + int GetSourceEncodingInt() const; + + int GetFrameCount() const; + +private: + cv::VideoCapture m_capture; + + void CheckIsOpen(const std::string& source); +}; + +class CvVideoFrameReaderRgbWrapper : + public IFrameReader +{ +public: + CvVideoFrameReaderRgbWrapper() = delete; + CvVideoFrameReaderRgbWrapper(const CvVideoFrameReaderRgbWrapper& o) = delete; + CvVideoFrameReaderRgbWrapper(CvVideoFrameReaderRgbWrapper&& o) = delete; + + CvVideoFrameReaderRgbWrapper(std::unique_ptr reader); + + std::shared_ptr ReadFrame() override; + + bool IsExhausted(const std::shared_ptr& frame) const override; + +private: + std::unique_ptr m_reader; +}; + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/CvWindowOutput.hpp b/samples/ObjectDetection/include/CvWindowOutput.hpp new file mode 100644 index 0000000000..317327ba62 --- /dev/null +++ b/samples/ObjectDetection/include/CvWindowOutput.hpp @@ -0,0 +1,53 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "IFrameOutput.hpp" +#include + +namespace od +{ + +class CvWindowOutput : public IFrameOutput { +public: + + CvWindowOutput() = default; + + ~CvWindowOutput() override = default; + + /** + * @brief Creates a named window. + * + * Uses opencv to create a window with given name. + * + * @param windowName opencv window name. + * + */ + void Init(const std::string& windowName); + + /** + * Writes frame to the window. + * + * @param frame data to write. + */ + void WriteFrame(std::shared_ptr& frame) override; + + /** + * Releases all windows. + */ + void Close() override; + + /** + * Always true. + * @return true. + */ + bool IsReady() const override; + +private: + std::string m_windowName; + +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/DetectedObject.hpp b/samples/ObjectDetection/include/DetectedObject.hpp new file mode 100644 index 0000000000..315ebccf07 --- /dev/null +++ b/samples/ObjectDetection/include/DetectedObject.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "BoundingBox.hpp" + +#include +#include + +namespace od +{ +/** + * An object detection network inference result decoded data representation. + */ +class DetectedObject +{ + +public: + DetectedObject(); + + /** + * Creates detection with given parameters. + * + * @param id - class id + * @param label - human readable text class label + * @param boundingBox - rectangular detection coordinates + * @param score - detection score/probability + */ + DetectedObject(unsigned int id, + std::string label, + const BoundingBox& boundingBox, + float score); + + ~DetectedObject() = default; + + /** + * Get class id + * @return id + */ + unsigned int GetId() const; + + /** + * Get human readable text class label + * @return label + */ + const std::string& GetLabel() const; + + /** + * Get rectangular detection coordinates + * @return detection coordinates + */ + const BoundingBox& GetBoundingBox() const; + + /** + * Get detection score + * @return score + */ + float GetScore() const; + + /** + * Set class id + * @param[in] id - class id + */ + void SetId(unsigned int id); + + /** + * Set class label + * @param[in] label - human readable text class label + */ + void SetLabel(const std::string& label); + + /** + * Set detection coordinates + * @param[in] boundingBox detection coordinates + */ + void SetBoundingBox(const BoundingBox& boundingBox); + + /** + * Set detection score + * @param[in] score - detection score + */ + void SetScore(float score); + +private: + unsigned int m_Id; + std::string m_Label; + BoundingBox m_BoundingBox; + float m_Score; +}; + +using DetectedObjects = std::vector; + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/IDetectionResultDecoder.hpp b/samples/ObjectDetection/include/IDetectionResultDecoder.hpp new file mode 100644 index 0000000000..c0a29df33f --- /dev/null +++ b/samples/ObjectDetection/include/IDetectionResultDecoder.hpp @@ -0,0 +1,39 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "DetectedObject.hpp" +#include "Types.hpp" + +#include + +namespace od +{ + +class IDetectionResultDecoder +{ +public: + /** + * @brief Returns decoded detected objects from a network model. + * @desc Outputs 4 vectors: bounding boxes, label, probabilities & number of detections. + * This function decodes network model output and converts it to expected format. + * + * @param[in] results Vector of outputs from a model. + * @param[in] outputFrameSize Struct containing height & width of output frame that is displayed. + * @param[in] resizedFrameSize Struct containing height & width of resized input frame before padding + * and inference. + * @param[in] labels Vector of network labels. + * @param[in] detectionScoreThreshold float value for the detection score threshold. + * + * @return Vector of decoded detected objects. + */ + virtual DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector& labels) = 0; + +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/IFrameOutput.hpp b/samples/ObjectDetection/include/IFrameOutput.hpp new file mode 100644 index 0000000000..c8b4fe5a47 --- /dev/null +++ b/samples/ObjectDetection/include/IFrameOutput.hpp @@ -0,0 +1,48 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace od +{ +/** + * @brief Frames output interface + * + * @tparam FrameDataT frame container data type + */ + template class IFrameOutput + { + + public: + /** + * @brief Writes frame to the selected output + * + * @param frame container + */ + virtual void WriteFrame(std::shared_ptr & frame) = 0; + + /** + * @brief Closes the frame output + */ + virtual void Close() = 0; + + /** + * @brief Checks if the frame sink is ready to write. + * + * @return True if frame sink is ready, False otherwise + */ + virtual bool IsReady() const = 0; + + /** + * @brief Default destructor + */ + virtual ~IFrameOutput() = default; + + }; + +}// namespace od diff --git a/samples/ObjectDetection/include/IFrameReader.hpp b/samples/ObjectDetection/include/IFrameReader.hpp new file mode 100644 index 0000000000..d371b7d2a5 --- /dev/null +++ b/samples/ObjectDetection/include/IFrameReader.hpp @@ -0,0 +1,45 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace od +{ +/** + * @brief Frame source reader interface + * + * @tparam FrameDataT frame container data type + */ +template class IFrameReader +{ + +public: + /** + * @brief Reads the next frame from the source + * + * @return pointer to the frame container + */ + virtual std::shared_ptr ReadFrame() = 0; + + /** + * @brief Checks if the frame source has more frames to read. + * + * @param[in] frame the pointer to the last frame captured with the ReadFrame method could be used in + * implementation specific logic to check frames source state. + * @return True if frame source was exhausted, False otherwise + */ + virtual bool IsExhausted(const std::shared_ptr & frame) const = 0; + + /** + * @brief Default destructor + */ + virtual ~IFrameReader() = default; + +}; + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/ImageUtils.hpp b/samples/ObjectDetection/include/ImageUtils.hpp new file mode 100644 index 0000000000..07e2b839f9 --- /dev/null +++ b/samples/ObjectDetection/include/ImageUtils.hpp @@ -0,0 +1,58 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "DetectedObject.hpp" +#include "Types.hpp" + +#include + +#include + +const cv::InterpolationFlags DefaultResizeFlag = cv::INTER_NEAREST; + +/** +* @brief Function to process the decoded results from the inference, and overlay the detail onto the provided frame +* @param[in] decodedResults the decoded results from the inference output. +* @param[in] inputFrame the frame to overlay the inference output details onto. +* @param[in] labels the label set associated with the trained model used. +*/ +void AddInferenceOutputToFrame(od::DetectedObjects& decodedResults, + cv::Mat& inputFrame, + std::vector>& labels); + +/** +* @brief Function to resize a frame while keeping aspect ratio. +* +* @param[in] frame the frame we want to resize from. +* @param[out] dest the frame we want to resize into. +* @param[in] aspectRatio aspect ratio to use when resizing. +*/ +void ResizeFrame(const cv::Mat& frame, cv::Mat& dest, const od::Size& aspectRatio); + +/** +* @brief Function to pad a frame. +* @param[in] src the frame we want to pad. +* @param[out] dest the frame we want to store the result. +* @param[in] bottom padding to use on bottom of the frame. +* @param[in] right padding to use on the right of the frame. +*/ +void PadFrame(const cv::Mat& src, cv::Mat& dest, int bottom, int right); + +/** + * Resize frame to the destination size and pad if necessary to preserve initial frame aspect ratio. + * + * @param frame input frame to resize + * @param dest output frame to place resized and padded result + * @param cache operation requires intermediate data container. + * @param destSize size of the destination frame + */ +void ResizeWithPad(const cv::Mat& frame, cv::Mat& dest, cv::Mat& cache, const od::Size& destSize); + +/** +* @brief Function to retrieve the cv::scalar color from a RGB tuple. +* @param[in] color the tuple form of the RGB color +*/ +static cv::Scalar GetScalarColorCode(std::tuple color); \ No newline at end of file diff --git a/samples/ObjectDetection/include/NetworkPipeline.hpp b/samples/ObjectDetection/include/NetworkPipeline.hpp new file mode 100644 index 0000000000..c3408b494e --- /dev/null +++ b/samples/ObjectDetection/include/NetworkPipeline.hpp @@ -0,0 +1,148 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ArmnnNetworkExecutor.hpp" +#include "YoloResultDecoder.hpp" +#include "SSDResultDecoder.hpp" +# include "ImageUtils.hpp" + +#include + +namespace od +{ +/** + * Generic object detection pipeline with 3 steps: data pre-processing, inference execution and inference + * result post-processing. + * + */ +class ObjDetectionPipeline { +public: + + /** + * Creates object detection pipeline with given network executor and decoder. + * @param executor - unique pointer to inference runner + * @param decoder - unique pointer to inference results decoder + */ + ObjDetectionPipeline(std::unique_ptr executor, + std::unique_ptr decoder); + + /** + * @brief Standard image pre-processing implementation. + * + * Re-sizes an image keeping aspect ratio, pads if necessary to fit the network input layer dimensions. + + * @param[in] frame - input image, expected data type is uint8. + * @param[out] processed - output image, data type is preserved. + */ + virtual void PreProcessing(const cv::Mat& frame, cv::Mat& processed); + + /** + * @brief Executes inference + * + * Calls inference runner provided during instance construction. + * + * @param[in] processed - input inference data. Data type should be aligned with input tensor. + * @param[out] result - raw floating point inference results. + */ + virtual void Inference(const cv::Mat& processed, InferenceResults& result); + + /** + * @brief Standard inference results post-processing implementation. + * + * Decodes inference results using decoder provided during construction. + * + * @param[in] inferenceResult - inference results to be decoded. + * @param[in] callback - a function to be called after successful inference results decoding. + */ + virtual void PostProcessing(InferenceResults& inferenceResult, + const std::function& callback); + +protected: + std::unique_ptr m_executor; + std::unique_ptr m_decoder; + Size m_inputImageSize{}; + cv::Mat m_processedFrame; +}; + +/** + * Specific to Yolo v3 tiny object detection pipeline implementation. + */ +class YoloV3Tiny: public ObjDetectionPipeline{ +public: + + /** + * Constructs object detection pipeline for Yolo v3 tiny network. + * + * Network input is expected to be uint8 or fp32. Data range [0, 255]. + * Network output is FP32. + * + * @param executor[in] - unique pointer to inference runner + * @param NMSThreshold[in] - non max suppression threshold for decoding step + * @param ClsThreshold[in] - class probability threshold for decoding step + * @param ObjectThreshold[in] - detected object score threshold for decoding step + */ + YoloV3Tiny(std::unique_ptr executor, + float NMSThreshold, float ClsThreshold, float ObjectThreshold); + + /** + * @brief Yolo v3 tiny image pre-processing implementation. + * + * On top of the standard pre-processing, converts input data type according to the network input tensor data type. + * Supported data types: uint8 and float32. + * + * @param[in] original - input image data + * @param[out] processed - image data ready to be used for inference. + */ + void PreProcessing(const cv::Mat& original, cv::Mat& processed); + +}; + +/** + * Specific to MobileNet SSD v1 object detection pipeline implementation. + */ +class MobileNetSSDv1: public ObjDetectionPipeline { + +public: + /** + * Constructs object detection pipeline for MobileNet SSD network. + * + * Network input is expected to be uint8 or fp32. Data range [-1, 1]. + * Network output is FP32. + * + * @param[in] - unique pointer to inference runner + * @paramp[in] objectThreshold - detected object score threshold for decoding step + */ + MobileNetSSDv1(std::unique_ptr executor, + float objectThreshold); + + /** + * @brief MobileNet SSD image pre-processing implementation. + * + * On top of the standard pre-processing, converts input data type according to the network input tensor data type + * and scales input data from [0, 255] to [-1, 1] for FP32 input. + * + * Supported input data types: uint8 and float32. + * + * @param[in] original - input image data + * @param processed[out] - image data ready to be used for inference. + */ + void PreProcessing(const cv::Mat& original, cv::Mat& processed); + +}; + +using IPipelinePtr = std::unique_ptr; + +/** + * Constructs object detection pipeline based on configuration provided. + * + * @param[in] config - object detection pipeline configuration. + * + * @return unique pointer to object detection pipeline. + */ +IPipelinePtr CreatePipeline(od::ODPipelineOptions& config); + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/NonMaxSuppression.hpp b/samples/ObjectDetection/include/NonMaxSuppression.hpp new file mode 100644 index 0000000000..204af0b528 --- /dev/null +++ b/samples/ObjectDetection/include/NonMaxSuppression.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "DetectedObject.hpp" + +#include +#include + +namespace od +{ + +/** +* @brief Returns a vector of indices corresponding to input detections kept after NMS. +* +* Perform non max suppression on input detections. Any detections with iou greater than +* given threshold are suppressed. Different detection labels are considered independently. +* +* @param[in] Vector of decoded detections. +* @param[in] Detects with IOU larger than this threshold are suppressed. +* @return Vector of indices corresponding to input detections kept after NMS. +* +*/ +std::vector NonMaxSuppression(DetectedObjects& inputDetections, float iouThresh); + +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/SSDResultDecoder.hpp b/samples/ObjectDetection/include/SSDResultDecoder.hpp new file mode 100644 index 0000000000..65afb8d376 --- /dev/null +++ b/samples/ObjectDetection/include/SSDResultDecoder.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Types.hpp" +#include "DetectedObject.hpp" +#include "IDetectionResultDecoder.hpp" + +namespace od +{ + +class SSDResultDecoder : public IDetectionResultDecoder +{ +public: + /** + * Constructs MobileNet ssd v1 inference results decoder. + * + * @param ObjectThreshold object score threshold + */ + SSDResultDecoder(float ObjectThreshold); + + DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector& labels) override; + +private: + float m_objectThreshold; +}; +}// namespace od \ No newline at end of file diff --git a/samples/ObjectDetection/include/Types.hpp b/samples/ObjectDetection/include/Types.hpp new file mode 100644 index 0000000000..801cff392a --- /dev/null +++ b/samples/ObjectDetection/include/Types.hpp @@ -0,0 +1,50 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace od +{ + +struct Size +{ + + uint32_t m_Width; + uint32_t m_Height; + + Size() : Size(0, 0) {} + + Size(uint32_t width, uint32_t height) : + m_Width{width}, m_Height{height} {} + + Size(const Size& other) + : Size(other.m_Width, other.m_Height) {} + + ~Size() = default; + + Size &operator=(const Size& other) = default; +}; + +struct BBoxColor +{ + std::tuple colorCode; +}; + +struct ODPipelineOptions +{ + std::string m_ModelName; + std::string m_ModelFilePath; + std::vector m_backends; +}; + +using InferenceResult = std::vector; +using InferenceResults = std::vector; +} \ No newline at end of file diff --git a/samples/ObjectDetection/include/YoloResultDecoder.hpp b/samples/ObjectDetection/include/YoloResultDecoder.hpp new file mode 100644 index 0000000000..98435e3cc9 --- /dev/null +++ b/samples/ObjectDetection/include/YoloResultDecoder.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Types.hpp" +#include "ArmnnNetworkExecutor.hpp" +#include "DetectedObject.hpp" +#include "IDetectionResultDecoder.hpp" +#include "NonMaxSuppression.hpp" + +namespace od +{ + +class YoloResultDecoder : public IDetectionResultDecoder +{ + +public: + /** + * Constructs Yolo V3 inference reuslts decoder. + * + * @param NMSThreshold non max suppression threshold + * @param ClsThreshold class probability threshold + * @param ObjectThreshold detected object score threshold + */ + YoloResultDecoder(float NMSThreshold, float ClsThreshold, float ObjectThreshold); + + DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector & labels) override; +private: + float m_NmsThreshold; + float m_ClsThreshold; + float m_objectThreshold; + + unsigned int m_boxElements = 4U; + unsigned int m_confidenceElements = 1U; + unsigned int m_numClasses = 80U; + unsigned int m_numBoxes = 2535U; +}; +}// namespace od \ No newline at end of file -- cgit v1.2.1