samples/ObjectDetection/include/NetworkPipeline.hpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

//
// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//

#pragma once

#include "ArmnnNetworkExecutor.hpp"
#include "YoloResultDecoder.hpp"
#include "SSDResultDecoder.hpp"
# include "ImageUtils.hpp"

#include <opencv2/opencv.hpp>

namespace od
{
/**
 * Generic object detection pipeline with 3 steps: data pre-processing, inference execution and inference
 * result post-processing.
 *
 */
class ObjDetectionPipeline {
public:

    /**
     * Creates object detection pipeline with given network executor and decoder.
     * @param executor - unique pointer to inference runner
     * @param decoder - unique pointer to inference results decoder
     */
    ObjDetectionPipeline(std::unique_ptr<ArmnnNetworkExecutor> executor,
                         std::unique_ptr<IDetectionResultDecoder> decoder);

    /**
     * @brief Standard image pre-processing implementation.
     *
     * Re-sizes an image keeping aspect ratio, pads if necessary to fit the network input layer dimensions.

     * @param[in] frame - input image, expected data type is uint8.
     * @param[out] processed - output image, data type is preserved.
     */
    virtual void PreProcessing(const cv::Mat& frame, cv::Mat& processed);

    /**
     * @brief Executes inference
     *
     * Calls inference runner provided during instance construction.
     *
     * @param[in] processed - input inference data. Data type should be aligned with input tensor.
     * @param[out] result - raw floating point inference results.
     */
    virtual void Inference(const cv::Mat& processed, InferenceResults& result);

    /**
     * @brief Standard inference results post-processing implementation.
     *
     * Decodes inference results using decoder provided during construction.
     *
     * @param[in] inferenceResult - inference results to be decoded.
     * @param[in] callback - a function to be called after successful inference results decoding.
     */
    virtual void PostProcessing(InferenceResults& inferenceResult,
                                const std::function<void (DetectedObjects)>& callback);

protected:
    std::unique_ptr<ArmnnNetworkExecutor> m_executor;
    std::unique_ptr<IDetectionResultDecoder> m_decoder;
    Size m_inputImageSize{};
    cv::Mat m_processedFrame;
};

/**
 * Specific to Yolo v3 tiny object detection pipeline implementation.
 */
class YoloV3Tiny: public ObjDetectionPipeline{
public:

    /**
     * Constructs object detection pipeline for Yolo v3 tiny network.
     *
     * Network input is expected to be uint8 or fp32. Data range [0, 255].
     * Network output is FP32.
     *
     * @param executor[in] - unique pointer to inference runner
     * @param NMSThreshold[in] - non max suppression threshold for decoding step
     * @param ClsThreshold[in] -  class probability threshold for decoding step
     * @param ObjectThreshold[in] - detected object score threshold for decoding step
     */
    YoloV3Tiny(std::unique_ptr<ArmnnNetworkExecutor> executor,
               float NMSThreshold, float ClsThreshold, float ObjectThreshold);

    /**
     * @brief Yolo v3 tiny image pre-processing implementation.
     *
     * On top of the standard pre-processing, converts input data type according to the network input tensor data type.
     * Supported data types: uint8 and float32.
     *
     * @param[in] original - input image data
     * @param[out] processed - image data ready to be used for inference.
     */
    void PreProcessing(const cv::Mat& original, cv::Mat& processed);

};

/**
 * Specific to MobileNet SSD v1 object detection pipeline implementation.
 */
class MobileNetSSDv1: public ObjDetectionPipeline {

public:
    /**
     * Constructs object detection pipeline for MobileNet SSD network.
     *
     * Network input is expected to be uint8 or fp32. Data range [-1, 1].
     * Network output is FP32.
     *
     * @param[in] - unique pointer to inference runner
     * @paramp[in] objectThreshold - detected object score threshold for decoding step
     */
    MobileNetSSDv1(std::unique_ptr<ArmnnNetworkExecutor> executor,
                   float objectThreshold);

    /**
     * @brief MobileNet SSD image pre-processing implementation.
     *
     * On top of the standard pre-processing, converts input data type according to the network input tensor data type
     * and scales input data from [0, 255] to [-1, 1] for FP32 input.
     *
     * Supported input data types: uint8 and float32.
     *
     * @param[in] original - input image data
     * @param processed[out] - image data ready to be used for inference.
     */
    void PreProcessing(const cv::Mat& original, cv::Mat& processed);

};

using IPipelinePtr = std::unique_ptr<od::ObjDetectionPipeline>;

/**
 * Constructs object detection pipeline based on configuration provided.
 *
 * @param[in] config - object detection pipeline configuration.
 *
 * @return unique pointer to object detection pipeline.
 */
IPipelinePtr CreatePipeline(od::ODPipelineOptions& config);

}// namespace od