diff options
author | Éanna Ó Catháin <eanna.ocathain@arm.com> | 2020-09-14 17:36:49 +0100 |
---|---|---|
committer | Jim Flynn <jim.flynn@arm.com> | 2020-09-14 18:40:01 +0000 |
commit | 919c14ef132986aa1514b2070ce6d19b5579a6ab (patch) | |
tree | 5c281e02a083768f65871cb861ab9b32ac7d8767 /samples/ObjectDetection/include | |
parent | 589e3e81a86c83456580e112978bf7a0ed5f43ac (diff) | |
download | armnn-919c14ef132986aa1514b2070ce6d19b5579a6ab.tar.gz |
MLECO-929 Add Object Detection sample application using the public ArmNN C++ API
Change-Id: I14aa1b4b726212cffbefd6687203f93f936fa872
Signed-off-by: Éanna Ó Catháin <eanna.ocathain@arm.com>
Diffstat (limited to 'samples/ObjectDetection/include')
16 files changed, 1047 insertions, 0 deletions
diff --git a/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp b/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp new file mode 100644 index 0000000000..c75b68bbe1 --- /dev/null +++ b/samples/ObjectDetection/include/ArmnnNetworkExecutor.hpp @@ -0,0 +1,80 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Types.hpp" + +#include "armnn/ArmNN.hpp" +#include "armnnTfLiteParser/ITfLiteParser.hpp" +#include "armnnUtils/DataLayoutIndexed.hpp" +#include <armnn/Logging.hpp> + +#include <string> +#include <vector> + +namespace od +{ +/** +* @brief Used to load in a network through ArmNN and run inference on it against a given backend. +* +*/ +class ArmnnNetworkExecutor +{ +private: + armnn::IRuntimePtr m_Runtime; + armnn::NetworkId m_NetId{}; + mutable InferenceResults m_OutputBuffer; + armnn::InputTensors m_InputTensors; + armnn::OutputTensors m_OutputTensors; + std::vector<armnnTfLiteParser::BindingPointInfo> m_outputBindingInfo; + + std::vector<std::string> m_outputLayerNamesList; + + armnnTfLiteParser::BindingPointInfo m_inputBindingInfo; + + void PrepareTensors(const void* inputData, const size_t dataBytes); + + template <typename Enumeration> + auto log_as_int(Enumeration value) + -> typename std::underlying_type<Enumeration>::type + { + return static_cast<typename std::underlying_type<Enumeration>::type>(value); + } + +public: + ArmnnNetworkExecutor() = delete; + + /** + * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a + * given backend. + * + * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors + * in output of the Run method. + * + * * @param[in] modelPath - Relative path to the model file + * * @param[in] backends - The list of preferred backends to run inference on + */ + ArmnnNetworkExecutor(std::string& modelPath, + std::vector<armnn::BackendId>& backends); + + /** + * @brief Returns the aspect ratio of the associated model in the order of width, height. + */ + Size GetImageAspectRatio(); + + armnn::DataType GetInputDataType() const; + + /** + * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object. + * + * @param[in] inputData - input frame data + * @param[in] dataBytes - input data size in bytes + * @param[out] results - Vector of DetectionResult objects used to store the output result. + */ + bool Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults); + +}; +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/BoundingBox.hpp b/samples/ObjectDetection/include/BoundingBox.hpp new file mode 100644 index 0000000000..2b790401db --- /dev/null +++ b/samples/ObjectDetection/include/BoundingBox.hpp @@ -0,0 +1,108 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +namespace od +{ +/** +* @brief Class used to store and receive bounding box location and size information +* +*/ +class BoundingBox +{ +public: + /** + * @brief Default constructor + */ + BoundingBox(); + + /** + * @brief Constructor with parameters to configure the bounding box dimensions + * @param[in] x int value representing the x coordinate. + * @param[in] y int value representing the y coordinate. + * @param[in] width unsigned int value representing the width value. + * @param[in] height unsigned int value representing the height value. + */ + BoundingBox(int x, int y, unsigned int width, unsigned int height); + + /** + * @brief Constructor with a BoundingBox type parameter to copy from. + * @param[in] other Bounding box to copy. + */ + BoundingBox(const BoundingBox& other); + + ~BoundingBox() = default; + + /** + * @brief Function to retrieve the X coordinate. + */ + int GetX() const; + + /** + * @brief Function to retrieve the Y coordinate. + */ + int GetY() const; + + /** + * @brief Function to retrieve the width. + */ + unsigned int GetWidth() const; + + /** + * @brief Function to retrieve the height. + */ + unsigned int GetHeight() const; + + /** + * @brief Function to set the X coordinate. + * @param[in] x int value representing x coordinate + */ + void SetX(int x); + + /** + * @brief Function to set the Y coordinate. + * @param[in] y int value representing y coordinate + */ + void SetY(int y); + + /** + * @brief Function to set the width of the BoundingBox. + * @param[in] width int value representing the width + */ + void SetWidth(unsigned int width); + + /** + * @brief Function to set the height of the BoundingBox. + * @param[in] height int value representing the height + */ + void SetHeight(unsigned int height); + + /** + * @brief Function to check equality with another BoundingBox + * @param[in] other BoundingBox to compare with + */ + BoundingBox& operator=(const BoundingBox& other); + +private: + int m_X; + int m_Y; + unsigned int m_Width; + unsigned int m_Height; +}; + +/* + * @brief: Get a bounding box within the limits of another bounding box + * + * @param[in] boxIn Input bounding box + * @param[out] boxOut Output bounding box + * @param[in] boxLimits Bounding box defining the limits which the output + * needs to conform to. + * @return none + */ +void GetValidBoundingBox(const BoundingBox& boxIn, BoundingBox& boxOut, + const BoundingBox& boxLimits); + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/CmdArgsParser.hpp b/samples/ObjectDetection/include/CmdArgsParser.hpp new file mode 100644 index 0000000000..6c22e6ff6d --- /dev/null +++ b/samples/ObjectDetection/include/CmdArgsParser.hpp @@ -0,0 +1,50 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once +#include <string> +#include <map> +#include <iostream> + +const std::string MODEL_NAME = "--model-name"; +const std::string VIDEO_FILE_PATH = "--video-file-path"; +const std::string MODEL_FILE_PATH = "--model-file-path"; +const std::string OUTPUT_VIDEO_FILE_PATH = "--output-video-file-path"; +const std::string LABEL_PATH = "--label-path"; +const std::string PREFERRED_BACKENDS = "--preferred-backends"; +const std::string HELP = "--help"; + +/* + * The accepted options for this Object detection executable + */ +static std::map<std::string, std::string> CMD_OPTIONS = { + {VIDEO_FILE_PATH, "[REQUIRED] Path to the video file to run object detection on"}, + {MODEL_FILE_PATH, "[REQUIRED] Path to the Object Detection model to use"}, + {LABEL_PATH, "[REQUIRED] Path to the label set for the provided model file. " + "Label file is should just be an ordered list, seperated by new line."}, + {MODEL_NAME, "[REQUIRED] The name of the model being used. Accepted options: YOLO_V3_TINY, SSD_MOBILE"}, + {OUTPUT_VIDEO_FILE_PATH, "[OPTIONAL] Path to the output video file with detections added in. " + "If specified will save file to disk, else displays the output to screen"}, + {PREFERRED_BACKENDS, "[OPTIONAL] Takes the preferred backends in preference order, separated by comma." + " For example: CpuAcc,GpuAcc,CpuRef. Accepted options: [CpuAcc, CpuRef, GpuAcc]." + " Defaults to CpuAcc,CpuRef"} +}; + +/* + * Checks that a particular option was specified by the user + */ +bool CheckOptionSpecified(const std::map<std::string, std::string>& options, const std::string& option); + + +/* + * Retrieves the user provided option + */ +std::string GetSpecifiedOption(const std::map<std::string, std::string>& options, const std::string& option); + + +/* + * Parses all the command line options provided by the user and stores in a map. + */ +int ParseOptions(std::map<std::string, std::string>& options, std::map<std::string, std::string>& acceptedOptions, + char *argv[], int argc);
\ No newline at end of file diff --git a/samples/ObjectDetection/include/CvVideoFileWriter.hpp b/samples/ObjectDetection/include/CvVideoFileWriter.hpp new file mode 100644 index 0000000000..ea1501b68e --- /dev/null +++ b/samples/ObjectDetection/include/CvVideoFileWriter.hpp @@ -0,0 +1,61 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "IFrameOutput.hpp" +#include <opencv2/opencv.hpp> + +namespace od +{ + +class CvVideoFileWriter : public IFrameOutput<cv::Mat> { +public: + /** + * @brief Default constructor. + * + * Underlying open cv video writer object will be instantiated. + */ + CvVideoFileWriter() = default; + + ~CvVideoFileWriter() override = default; + + /** + * @brief Initialises video file writer. + * + * Opens opencv writer with given params. FFMPEG backend is used. + * + * @param outputVideo path to the video file. + * @param encoding cv::CAP_PROP_FOURCC code. + * @param fps target frame rate. + * @param width target frame width. + * @param height target frame height. + * + */ + void Init(const std::string& outputVideo, int encoding, double fps, int width, int height); + + /** + * Writes frame to the file using opencv writer. + * + * @param frame data to write. + */ + void WriteFrame(std::shared_ptr<cv::Mat>& frame) override; + + /** + * Releases opencv writer. + */ + void Close() override; + + /** + * Checks if opencv writer was successfully opened. + * @return true is underlying writer is ready to be used, false otherwise. + */ + bool IsReady() const override; + +private: + cv::VideoWriter m_cvWriter{}; + bool m_ready = false; +}; +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/CvVideoFrameReader.hpp b/samples/ObjectDetection/include/CvVideoFrameReader.hpp new file mode 100644 index 0000000000..081f92620e --- /dev/null +++ b/samples/ObjectDetection/include/CvVideoFrameReader.hpp @@ -0,0 +1,108 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + + +#include "IFrameReader.hpp" +#include <opencv2/opencv.hpp> + +namespace od +{ + +class CvVideoFrameReader : + public IFrameReader<cv::Mat> +{ +public: + /** + * @brief Default constructor. + * + * Underlying open cv video capture object will be instantiated. + */ + CvVideoFrameReader() = default; + + ~CvVideoFrameReader() override = default; + + /** + *@brief Initialises reader to capture frames from video file. + * + * @param source path to the video file or image sequence. + * + * @throws std::runtime_error if init failed + */ + void Init(const std::string& source); + + std::shared_ptr <cv::Mat> ReadFrame() override; + + bool IsExhausted(const std::shared_ptr <cv::Mat>& frame) const override; + + /** + * Returns effective video frame width supported by the source/set by the user. + * Must be called after Init method. + * @return frame width + */ + int GetSourceWidth() const; + + /** + * Returns effective video frame height supported by the source/set by the user. + * Must be called after Init method. + * @return frame height + */ + int GetSourceHeight() const; + + /** + * Returns effective fps value supported by the source/set by the user. + * @return fps value + */ + double GetSourceFps() const; + + /** + * Will query OpenCV to convert images to RGB + * Copy is actually default behaviour, but the set function needs to be called + * in order to know whether OpenCV supports conversion from our source format. + * @return boolean, + * true: OpenCV returns RGB + * false: OpenCV returns the fourcc format from GetSourceEncoding + */ + bool ConvertToRGB(); + + /** + * Returns 4-character code of codec. + * @return codec name + */ + std::string GetSourceEncoding() const; + + /** + * Get the fourcc int from its string name. + * @return codec int + */ + int GetSourceEncodingInt() const; + + int GetFrameCount() const; + +private: + cv::VideoCapture m_capture; + + void CheckIsOpen(const std::string& source); +}; + +class CvVideoFrameReaderRgbWrapper : + public IFrameReader<cv::Mat> +{ +public: + CvVideoFrameReaderRgbWrapper() = delete; + CvVideoFrameReaderRgbWrapper(const CvVideoFrameReaderRgbWrapper& o) = delete; + CvVideoFrameReaderRgbWrapper(CvVideoFrameReaderRgbWrapper&& o) = delete; + + CvVideoFrameReaderRgbWrapper(std::unique_ptr<od::CvVideoFrameReader> reader); + + std::shared_ptr<cv::Mat> ReadFrame() override; + + bool IsExhausted(const std::shared_ptr<cv::Mat>& frame) const override; + +private: + std::unique_ptr<od::CvVideoFrameReader> m_reader; +}; + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/CvWindowOutput.hpp b/samples/ObjectDetection/include/CvWindowOutput.hpp new file mode 100644 index 0000000000..317327ba62 --- /dev/null +++ b/samples/ObjectDetection/include/CvWindowOutput.hpp @@ -0,0 +1,53 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "IFrameOutput.hpp" +#include <opencv2/opencv.hpp> + +namespace od +{ + +class CvWindowOutput : public IFrameOutput<cv::Mat> { +public: + + CvWindowOutput() = default; + + ~CvWindowOutput() override = default; + + /** + * @brief Creates a named window. + * + * Uses opencv to create a window with given name. + * + * @param windowName opencv window name. + * + */ + void Init(const std::string& windowName); + + /** + * Writes frame to the window. + * + * @param frame data to write. + */ + void WriteFrame(std::shared_ptr<cv::Mat>& frame) override; + + /** + * Releases all windows. + */ + void Close() override; + + /** + * Always true. + * @return true. + */ + bool IsReady() const override; + +private: + std::string m_windowName; + +}; +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/DetectedObject.hpp b/samples/ObjectDetection/include/DetectedObject.hpp new file mode 100644 index 0000000000..315ebccf07 --- /dev/null +++ b/samples/ObjectDetection/include/DetectedObject.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "BoundingBox.hpp" + +#include <string> +#include <vector> + +namespace od +{ +/** + * An object detection network inference result decoded data representation. + */ +class DetectedObject +{ + +public: + DetectedObject(); + + /** + * Creates detection with given parameters. + * + * @param id - class id + * @param label - human readable text class label + * @param boundingBox - rectangular detection coordinates + * @param score - detection score/probability + */ + DetectedObject(unsigned int id, + std::string label, + const BoundingBox& boundingBox, + float score); + + ~DetectedObject() = default; + + /** + * Get class id + * @return id + */ + unsigned int GetId() const; + + /** + * Get human readable text class label + * @return label + */ + const std::string& GetLabel() const; + + /** + * Get rectangular detection coordinates + * @return detection coordinates + */ + const BoundingBox& GetBoundingBox() const; + + /** + * Get detection score + * @return score + */ + float GetScore() const; + + /** + * Set class id + * @param[in] id - class id + */ + void SetId(unsigned int id); + + /** + * Set class label + * @param[in] label - human readable text class label + */ + void SetLabel(const std::string& label); + + /** + * Set detection coordinates + * @param[in] boundingBox detection coordinates + */ + void SetBoundingBox(const BoundingBox& boundingBox); + + /** + * Set detection score + * @param[in] score - detection score + */ + void SetScore(float score); + +private: + unsigned int m_Id; + std::string m_Label; + BoundingBox m_BoundingBox; + float m_Score; +}; + +using DetectedObjects = std::vector<DetectedObject>; + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/IDetectionResultDecoder.hpp b/samples/ObjectDetection/include/IDetectionResultDecoder.hpp new file mode 100644 index 0000000000..c0a29df33f --- /dev/null +++ b/samples/ObjectDetection/include/IDetectionResultDecoder.hpp @@ -0,0 +1,39 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "DetectedObject.hpp" +#include "Types.hpp" + +#include <vector> + +namespace od +{ + +class IDetectionResultDecoder +{ +public: + /** + * @brief Returns decoded detected objects from a network model. + * @desc Outputs 4 vectors: bounding boxes, label, probabilities & number of detections. + * This function decodes network model output and converts it to expected format. + * + * @param[in] results Vector of outputs from a model. + * @param[in] outputFrameSize Struct containing height & width of output frame that is displayed. + * @param[in] resizedFrameSize Struct containing height & width of resized input frame before padding + * and inference. + * @param[in] labels Vector of network labels. + * @param[in] detectionScoreThreshold float value for the detection score threshold. + * + * @return Vector of decoded detected objects. + */ + virtual DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector<std::string>& labels) = 0; + +}; +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/IFrameOutput.hpp b/samples/ObjectDetection/include/IFrameOutput.hpp new file mode 100644 index 0000000000..c8b4fe5a47 --- /dev/null +++ b/samples/ObjectDetection/include/IFrameOutput.hpp @@ -0,0 +1,48 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <cstddef> +#include <memory> + +namespace od +{ +/** + * @brief Frames output interface + * + * @tparam FrameDataT frame container data type + */ + template<typename FrameDataT> class IFrameOutput + { + + public: + /** + * @brief Writes frame to the selected output + * + * @param frame container + */ + virtual void WriteFrame(std::shared_ptr <FrameDataT>& frame) = 0; + + /** + * @brief Closes the frame output + */ + virtual void Close() = 0; + + /** + * @brief Checks if the frame sink is ready to write. + * + * @return True if frame sink is ready, False otherwise + */ + virtual bool IsReady() const = 0; + + /** + * @brief Default destructor + */ + virtual ~IFrameOutput() = default; + + }; + +}// namespace od diff --git a/samples/ObjectDetection/include/IFrameReader.hpp b/samples/ObjectDetection/include/IFrameReader.hpp new file mode 100644 index 0000000000..d371b7d2a5 --- /dev/null +++ b/samples/ObjectDetection/include/IFrameReader.hpp @@ -0,0 +1,45 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <cstddef> +#include <memory> + +namespace od +{ +/** + * @brief Frame source reader interface + * + * @tparam FrameDataT frame container data type + */ +template<typename FrameDataT> class IFrameReader +{ + +public: + /** + * @brief Reads the next frame from the source + * + * @return pointer to the frame container + */ + virtual std::shared_ptr <FrameDataT> ReadFrame() = 0; + + /** + * @brief Checks if the frame source has more frames to read. + * + * @param[in] frame the pointer to the last frame captured with the ReadFrame method could be used in + * implementation specific logic to check frames source state. + * @return True if frame source was exhausted, False otherwise + */ + virtual bool IsExhausted(const std::shared_ptr <FrameDataT>& frame) const = 0; + + /** + * @brief Default destructor + */ + virtual ~IFrameReader() = default; + +}; + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/ImageUtils.hpp b/samples/ObjectDetection/include/ImageUtils.hpp new file mode 100644 index 0000000000..07e2b839f9 --- /dev/null +++ b/samples/ObjectDetection/include/ImageUtils.hpp @@ -0,0 +1,58 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "DetectedObject.hpp" +#include "Types.hpp" + +#include <opencv2/opencv.hpp> + +#include <vector> + +const cv::InterpolationFlags DefaultResizeFlag = cv::INTER_NEAREST; + +/** +* @brief Function to process the decoded results from the inference, and overlay the detail onto the provided frame +* @param[in] decodedResults the decoded results from the inference output. +* @param[in] inputFrame the frame to overlay the inference output details onto. +* @param[in] labels the label set associated with the trained model used. +*/ +void AddInferenceOutputToFrame(od::DetectedObjects& decodedResults, + cv::Mat& inputFrame, + std::vector<std::tuple<std::string, od::BBoxColor>>& labels); + +/** +* @brief Function to resize a frame while keeping aspect ratio. +* +* @param[in] frame the frame we want to resize from. +* @param[out] dest the frame we want to resize into. +* @param[in] aspectRatio aspect ratio to use when resizing. +*/ +void ResizeFrame(const cv::Mat& frame, cv::Mat& dest, const od::Size& aspectRatio); + +/** +* @brief Function to pad a frame. +* @param[in] src the frame we want to pad. +* @param[out] dest the frame we want to store the result. +* @param[in] bottom padding to use on bottom of the frame. +* @param[in] right padding to use on the right of the frame. +*/ +void PadFrame(const cv::Mat& src, cv::Mat& dest, int bottom, int right); + +/** + * Resize frame to the destination size and pad if necessary to preserve initial frame aspect ratio. + * + * @param frame input frame to resize + * @param dest output frame to place resized and padded result + * @param cache operation requires intermediate data container. + * @param destSize size of the destination frame + */ +void ResizeWithPad(const cv::Mat& frame, cv::Mat& dest, cv::Mat& cache, const od::Size& destSize); + +/** +* @brief Function to retrieve the cv::scalar color from a RGB tuple. +* @param[in] color the tuple form of the RGB color +*/ +static cv::Scalar GetScalarColorCode(std::tuple<int, int, int> color);
\ No newline at end of file diff --git a/samples/ObjectDetection/include/NetworkPipeline.hpp b/samples/ObjectDetection/include/NetworkPipeline.hpp new file mode 100644 index 0000000000..c3408b494e --- /dev/null +++ b/samples/ObjectDetection/include/NetworkPipeline.hpp @@ -0,0 +1,148 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ArmnnNetworkExecutor.hpp" +#include "YoloResultDecoder.hpp" +#include "SSDResultDecoder.hpp" +# include "ImageUtils.hpp" + +#include <opencv2/opencv.hpp> + +namespace od +{ +/** + * Generic object detection pipeline with 3 steps: data pre-processing, inference execution and inference + * result post-processing. + * + */ +class ObjDetectionPipeline { +public: + + /** + * Creates object detection pipeline with given network executor and decoder. + * @param executor - unique pointer to inference runner + * @param decoder - unique pointer to inference results decoder + */ + ObjDetectionPipeline(std::unique_ptr<ArmnnNetworkExecutor> executor, + std::unique_ptr<IDetectionResultDecoder> decoder); + + /** + * @brief Standard image pre-processing implementation. + * + * Re-sizes an image keeping aspect ratio, pads if necessary to fit the network input layer dimensions. + + * @param[in] frame - input image, expected data type is uint8. + * @param[out] processed - output image, data type is preserved. + */ + virtual void PreProcessing(const cv::Mat& frame, cv::Mat& processed); + + /** + * @brief Executes inference + * + * Calls inference runner provided during instance construction. + * + * @param[in] processed - input inference data. Data type should be aligned with input tensor. + * @param[out] result - raw floating point inference results. + */ + virtual void Inference(const cv::Mat& processed, InferenceResults& result); + + /** + * @brief Standard inference results post-processing implementation. + * + * Decodes inference results using decoder provided during construction. + * + * @param[in] inferenceResult - inference results to be decoded. + * @param[in] callback - a function to be called after successful inference results decoding. + */ + virtual void PostProcessing(InferenceResults& inferenceResult, + const std::function<void (DetectedObjects)>& callback); + +protected: + std::unique_ptr<ArmnnNetworkExecutor> m_executor; + std::unique_ptr<IDetectionResultDecoder> m_decoder; + Size m_inputImageSize{}; + cv::Mat m_processedFrame; +}; + +/** + * Specific to Yolo v3 tiny object detection pipeline implementation. + */ +class YoloV3Tiny: public ObjDetectionPipeline{ +public: + + /** + * Constructs object detection pipeline for Yolo v3 tiny network. + * + * Network input is expected to be uint8 or fp32. Data range [0, 255]. + * Network output is FP32. + * + * @param executor[in] - unique pointer to inference runner + * @param NMSThreshold[in] - non max suppression threshold for decoding step + * @param ClsThreshold[in] - class probability threshold for decoding step + * @param ObjectThreshold[in] - detected object score threshold for decoding step + */ + YoloV3Tiny(std::unique_ptr<ArmnnNetworkExecutor> executor, + float NMSThreshold, float ClsThreshold, float ObjectThreshold); + + /** + * @brief Yolo v3 tiny image pre-processing implementation. + * + * On top of the standard pre-processing, converts input data type according to the network input tensor data type. + * Supported data types: uint8 and float32. + * + * @param[in] original - input image data + * @param[out] processed - image data ready to be used for inference. + */ + void PreProcessing(const cv::Mat& original, cv::Mat& processed); + +}; + +/** + * Specific to MobileNet SSD v1 object detection pipeline implementation. + */ +class MobileNetSSDv1: public ObjDetectionPipeline { + +public: + /** + * Constructs object detection pipeline for MobileNet SSD network. + * + * Network input is expected to be uint8 or fp32. Data range [-1, 1]. + * Network output is FP32. + * + * @param[in] - unique pointer to inference runner + * @paramp[in] objectThreshold - detected object score threshold for decoding step + */ + MobileNetSSDv1(std::unique_ptr<ArmnnNetworkExecutor> executor, + float objectThreshold); + + /** + * @brief MobileNet SSD image pre-processing implementation. + * + * On top of the standard pre-processing, converts input data type according to the network input tensor data type + * and scales input data from [0, 255] to [-1, 1] for FP32 input. + * + * Supported input data types: uint8 and float32. + * + * @param[in] original - input image data + * @param processed[out] - image data ready to be used for inference. + */ + void PreProcessing(const cv::Mat& original, cv::Mat& processed); + +}; + +using IPipelinePtr = std::unique_ptr<od::ObjDetectionPipeline>; + +/** + * Constructs object detection pipeline based on configuration provided. + * + * @param[in] config - object detection pipeline configuration. + * + * @return unique pointer to object detection pipeline. + */ +IPipelinePtr CreatePipeline(od::ODPipelineOptions& config); + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/NonMaxSuppression.hpp b/samples/ObjectDetection/include/NonMaxSuppression.hpp new file mode 100644 index 0000000000..204af0b528 --- /dev/null +++ b/samples/ObjectDetection/include/NonMaxSuppression.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "DetectedObject.hpp" + +#include <numeric> +#include <vector> + +namespace od +{ + +/** +* @brief Returns a vector of indices corresponding to input detections kept after NMS. +* +* Perform non max suppression on input detections. Any detections with iou greater than +* given threshold are suppressed. Different detection labels are considered independently. +* +* @param[in] Vector of decoded detections. +* @param[in] Detects with IOU larger than this threshold are suppressed. +* @return Vector of indices corresponding to input detections kept after NMS. +* +*/ +std::vector<int> NonMaxSuppression(DetectedObjects& inputDetections, float iouThresh); + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/SSDResultDecoder.hpp b/samples/ObjectDetection/include/SSDResultDecoder.hpp new file mode 100644 index 0000000000..65afb8d376 --- /dev/null +++ b/samples/ObjectDetection/include/SSDResultDecoder.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Types.hpp" +#include "DetectedObject.hpp" +#include "IDetectionResultDecoder.hpp" + +namespace od +{ + +class SSDResultDecoder : public IDetectionResultDecoder +{ +public: + /** + * Constructs MobileNet ssd v1 inference results decoder. + * + * @param ObjectThreshold object score threshold + */ + SSDResultDecoder(float ObjectThreshold); + + DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector<std::string>& labels) override; + +private: + float m_objectThreshold; +}; +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/include/Types.hpp b/samples/ObjectDetection/include/Types.hpp new file mode 100644 index 0000000000..801cff392a --- /dev/null +++ b/samples/ObjectDetection/include/Types.hpp @@ -0,0 +1,50 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <vector> +#include <tuple> +#include <armnn/BackendId.hpp> + +namespace od +{ + +struct Size +{ + + uint32_t m_Width; + uint32_t m_Height; + + Size() : Size(0, 0) {} + + Size(uint32_t width, uint32_t height) : + m_Width{width}, m_Height{height} {} + + Size(const Size& other) + : Size(other.m_Width, other.m_Height) {} + + ~Size() = default; + + Size &operator=(const Size& other) = default; +}; + +struct BBoxColor +{ + std::tuple<int, int, int> colorCode; +}; + +struct ODPipelineOptions +{ + std::string m_ModelName; + std::string m_ModelFilePath; + std::vector<armnn::BackendId> m_backends; +}; + +using InferenceResult = std::vector<float>; +using InferenceResults = std::vector<InferenceResult>; +}
\ No newline at end of file diff --git a/samples/ObjectDetection/include/YoloResultDecoder.hpp b/samples/ObjectDetection/include/YoloResultDecoder.hpp new file mode 100644 index 0000000000..98435e3cc9 --- /dev/null +++ b/samples/ObjectDetection/include/YoloResultDecoder.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "Types.hpp" +#include "ArmnnNetworkExecutor.hpp" +#include "DetectedObject.hpp" +#include "IDetectionResultDecoder.hpp" +#include "NonMaxSuppression.hpp" + +namespace od +{ + +class YoloResultDecoder : public IDetectionResultDecoder +{ + +public: + /** + * Constructs Yolo V3 inference reuslts decoder. + * + * @param NMSThreshold non max suppression threshold + * @param ClsThreshold class probability threshold + * @param ObjectThreshold detected object score threshold + */ + YoloResultDecoder(float NMSThreshold, float ClsThreshold, float ObjectThreshold); + + DetectedObjects Decode(const InferenceResults& results, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector <std::string>& labels) override; +private: + float m_NmsThreshold; + float m_ClsThreshold; + float m_objectThreshold; + + unsigned int m_boxElements = 4U; + unsigned int m_confidenceElements = 1U; + unsigned int m_numClasses = 80U; + unsigned int m_numBoxes = 2535U; +}; +}// namespace od
\ No newline at end of file |