MLECO-929 Add Object Detection sample application using the public ArmNN C++ API

Change-Id: I14aa1b4b726212cffbefd6687203f93f936fa872 Signed-off-by: Éanna Ó Catháin <eanna.ocathain@arm.com>
author: Éanna Ó Catháin <eanna.ocathain@arm.com> 2020-09-14 17:36:49 +0100
committer: Jim Flynn <jim.flynn@arm.com> 2020-09-14 18:40:01 +0000
commit: 919c14ef132986aa1514b2070ce6d19b5579a6ab (patch)
tree: 5c281e02a083768f65871cb861ab9b32ac7d8767 /samples/ObjectDetection/src
parent: 589e3e81a86c83456580e112978bf7a0ed5f43ac (diff)
download: armnn-919c14ef132986aa1514b2070ce6d19b5579a6ab.tar.gz
13 files changed, 1220 insertions, 0 deletions
diff --git a/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp
new file mode 100644
index 0000000000..cb4c0c9f84
--- /dev/null
+++ b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp
@@ -0,0 +1,140 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ArmnnNetworkExecutor.hpp"
+#include "Types.hpp"
+
+#include <random>
+#include <string>
+
+namespace od
+{
+
+armnn::DataType ArmnnNetworkExecutor::GetInputDataType() const
+{
+    return m_inputBindingInfo.second.GetDataType();
+}
+
+ArmnnNetworkExecutor::ArmnnNetworkExecutor(std::string& modelPath,
+                                           std::vector<armnn::BackendId>& preferredBackends)
+: m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
+{
+    // Import the TensorFlow lite model.
+    armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
+    armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
+
+    std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);
+
+    m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);
+
+    m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);
+
+    std::vector<armnn::BindingPointInfo> outputBindings;
+    for(const std::string& name : m_outputLayerNamesList)
+    {
+        m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
+    }
+
+    std::vector<std::string> errorMessages;
+    // optimize the network.
+    armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
+                                                  preferredBackends,
+                                                  m_Runtime->GetDeviceSpec(),
+                                                  armnn::OptimizerOptions(),
+                                                  armnn::Optional<std::vector<std::string>&>(errorMessages));
+
+    if (!optNet)
+    {
+        const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
+        ARMNN_LOG(error) << errorMessage;
+        throw armnn::Exception(errorMessage);
+    }
+
+    // Load the optimized network onto the m_Runtime device
+    std::string errorMessage;
+    if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
+    {
+        ARMNN_LOG(error) << errorMessage;
+    }
+
+    //pre-allocate memory for output (the size of it never changes)
+    for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
+    {
+        const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
+        const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();
+
+        InferenceResult oneLayerOutResult;
+        switch (dataType)
+        {
+            case armnn::DataType::Float32:
+            {
+                oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
+                break;
+            }
+            default:
+            {
+                errorMessage = "ArmnnNetworkExecutor: unsupported output tensor data type";
+                ARMNN_LOG(error) << errorMessage << " " << log_as_int(dataType);
+                throw armnn::Exception(errorMessage);
+            }
+        }
+
+        m_OutputBuffer.emplace_back(oneLayerOutResult);
+
+        // Make ArmNN output tensors
+        m_OutputTensors.reserve(m_OutputBuffer.size());
+        for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
+        {
+            m_OutputTensors.emplace_back(std::make_pair(
+                    m_outputBindingInfo[it].first,
+                    armnn::Tensor(m_outputBindingInfo[it].second,
+                                  m_OutputBuffer.at(it).data())
+            ));
+        }
+    }
+
+}
+
+void ArmnnNetworkExecutor::PrepareTensors(const void* inputData, const size_t dataBytes)
+{
+    assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
+    m_InputTensors.clear();
+    m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
+}
+
+bool ArmnnNetworkExecutor::Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults)
+{
+    /* Prepare tensors if they are not ready */
+    ARMNN_LOG(debug) << "Preparing tensors...";
+    this->PrepareTensors(inputData, dataBytes);
+    ARMNN_LOG(trace) << "Running inference...";
+
+    armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);
+
+    std::stringstream inferenceFinished;
+    inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";
+
+    ARMNN_LOG(trace) << inferenceFinished.str();
+
+    if (ret == armnn::Status::Failure)
+    {
+        ARMNN_LOG(error) << "Failed to perform inference.";
+    }
+
+    outResults.reserve(m_outputLayerNamesList.size());
+    outResults = m_OutputBuffer;
+
+    return (armnn::Status::Success == ret);
+}
+
+Size ArmnnNetworkExecutor::GetImageAspectRatio()
+{
+    const auto shape = m_inputBindingInfo.second.GetShape();
+    assert(shape.GetNumDimensions() == 4);
+    armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
+    return Size(shape[nhwc.GetWidthIndex()],
+                shape[nhwc.GetHeightIndex()]);
+}
+}// namespace od
+\ No newline at end of file
diff --git a/samples/ObjectDetection/src/BoundingBox.cpp b/samples/ObjectDetection/src/BoundingBox.cpp
new file mode 100644
index 0000000000..c52b0fe58a
--- /dev/null
+++ b/samples/ObjectDetection/src/BoundingBox.cpp
@@ -0,0 +1,116 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BoundingBox.hpp"
+#include <algorithm>
+namespace od
+{
+
+BoundingBox::BoundingBox() :
+        BoundingBox(0, 0, 0u, 0u) {}
+
+BoundingBox::BoundingBox(
+        int x,
+        int y,
+        unsigned int width,
+        unsigned int height) :
+        m_X(x),
+        m_Y(y),
+        m_Width(width),
+        m_Height(height) {}
+
+BoundingBox::BoundingBox(const BoundingBox& other) :
+        m_X(other.m_X),
+        m_Y(other.m_Y),
+        m_Width(other.m_Width),
+        m_Height(other.m_Height) {}
+
+int BoundingBox::GetX() const {
+    return m_X;
+}
+
+int BoundingBox::GetY() const {
+    return m_Y;
+}
+
+unsigned int BoundingBox::GetWidth() const {
+    return m_Width;
+}
+
+unsigned int BoundingBox::GetHeight() const {
+    return m_Height;
+}
+
+void BoundingBox::SetX(int x) {
+    m_X = x;
+}
+
+void BoundingBox::SetY(int y) {
+    m_Y = y;
+}
+
+void BoundingBox::SetWidth(unsigned int width) {
+    m_Width = width;
+}
+
+void BoundingBox::SetHeight(unsigned int height) {
+    m_Height = height;
+}
+
+BoundingBox& BoundingBox::operator=(const BoundingBox& other) {
+    m_X = other.m_X;
+    m_Y = other.m_Y;
+
+    m_Width = other.m_Width;
+    m_Height = other.m_Height;
+
+    return *this;
+}
+
+/* Helper function to get a "valid" bounding box */
+void GetValidBoundingBox(const BoundingBox& boxIn, BoundingBox& boxOut,
+                         const BoundingBox& boxLimits) {
+    boxOut.SetX(std::max(boxIn.GetX(), boxLimits.GetX()));
+    boxOut.SetY(std::max(boxIn.GetY(), boxLimits.GetY()));
+
+    /* If we have changed x and/or y, we compensate by reducing the height and/or width */
+    int boxOutWidth = static_cast<int>(boxIn.GetWidth()) -
+                      std::max(0, (boxOut.GetX() - boxIn.GetX()));
+    int boxOutHeight = static_cast<int>(boxIn.GetHeight()) -
+                       std::max(0, (boxOut.GetY() - boxIn.GetY()));
+
+    /* This suggests that there was no overlap on x or/and y axis */
+    if (boxOutHeight <= 0 || boxOutWidth <= 0)
+    {
+        boxOut = BoundingBox{0, 0, 0, 0};
+        return;
+    }
+
+    const int limitBoxRightX = boxLimits.GetX() + static_cast<int>(boxLimits.GetWidth());
+    const int limitBoxRightY = boxLimits.GetY() + static_cast<int>(boxLimits.GetHeight());
+    const int boxRightX = boxOut.GetX() + boxOutWidth;
+    const int boxRightY = boxOut.GetY() + boxOutHeight;
+
+    if (boxRightX > limitBoxRightX)
+    {
+        boxOutWidth -= (boxRightX - limitBoxRightX);
+    }
+
+    if (boxRightY > limitBoxRightY)
+    {
+        boxOutHeight -= (boxRightY - limitBoxRightY);
+    }
+
+    /* This suggests value has rolled over because of very high numbers, not handled for now */
+    if (boxOutHeight <= 0 || boxOutWidth <= 0)
+    {
+        boxOut = BoundingBox{0, 0, 0, 0};
+        return;
+    }
+
+    boxOut.SetHeight(static_cast<unsigned int>(boxOutHeight));
+    boxOut.SetWidth(static_cast<unsigned int>(boxOutWidth));
+}
+}// namespace od
+\ No newline at end of file
diff --git a/samples/ObjectDetection/src/CmdArgsParser.cpp b/samples/ObjectDetection/src/CmdArgsParser.cpp
new file mode 100644
index 0000000000..b8c74bc10f
--- /dev/null
+++ b/samples/ObjectDetection/src/CmdArgsParser.cpp
@@ -0,0 +1,70 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CmdArgsParser.hpp"
+#include <iostream>
+/*
+ * Checks that a particular option was specified by the user
+ */
+bool CheckOptionSpecified(const std::map<std::string, std::string>& options, const std::string& option)
+{
+    auto it = options.find(option);
+    return it!=options.end();
+}
+
+/*
+ * Retrieves the user provided option
+ */
+std::string GetSpecifiedOption(const std::map<std::string, std::string>& options, const std::string& option)
+{
+    if (CheckOptionSpecified(options, option)){
+        return options.at(option);
+    }
+    else
+    {
+        throw std::invalid_argument("Required option: " + option + " not defined.");
+    }
+}
+
+/*
+ * Parses all the command line options provided by the user and stores in a map.
+ */
+int ParseOptions(std::map<std::string, std::string>& options, std::map<std::string, std::string>& acceptedOptions,
+                 char *argv[], int argc)
+{
+    for (int i = 1; i < argc; ++i)
+    {
+        std::string currentOption = std::string(argv[i]);
+        auto it = acceptedOptions.find(currentOption);
+        if (it != acceptedOptions.end())
+        {
+            if (i + 1 < argc && std::string(argv[i + 1]).rfind("--", 0) != 0)
+            {
+                std::string value = argv[++i];
+                options.insert({it->first, value});
+            }
+            else if (std::string(argv[i]) == HELP)
+            {
+                std::cout << "Available options" << std::endl;
+                for (auto & acceptedOption : acceptedOptions)
+                {
+                    std::cout << acceptedOption.first << " : " << acceptedOption.second << std::endl;
+                }
+                return 2;
+            }
+            else
+            {
+                std::cerr << std::string(argv[i]) << " option requires one argument." << std::endl;
+                return 1;
+            }
+        }
+        else
+        {
+            std::cerr << "Unrecognised option: " << std::string(argv[i]) << std::endl;
+            return 1;
+        }
+    }
+    return 0;
+}
diff --git a/samples/ObjectDetection/src/CvVideoFileWriter.cpp b/samples/ObjectDetection/src/CvVideoFileWriter.cpp
new file mode 100644
index 0000000000..ab80b95d49
--- /dev/null
+++ b/samples/ObjectDetection/src/CvVideoFileWriter.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CvVideoFileWriter.hpp"
+
+namespace od
+{
+
+void CvVideoFileWriter::Init(const std::string& outputVideo, int encoding, double fps, int width, int height)
+{
+    m_ready = m_cvWriter.open(outputVideo, cv::CAP_FFMPEG,
+                              encoding,
+                              fps,
+                              cv::Size(width, height), true);
+}
+
+
+void CvVideoFileWriter::WriteFrame(std::shared_ptr<cv::Mat>& frame)
+{
+    if(m_cvWriter.isOpened())
+    {
+        cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR);
+        m_cvWriter.write(*frame);
+    }
+}
+
+bool CvVideoFileWriter::IsReady() const
+{
+    return m_ready;
+}
+
+void CvVideoFileWriter::Close()
+{
+    m_cvWriter.release();
+}
+}// namespace od
diff --git a/samples/ObjectDetection/src/CvVideoFrameReader.cpp b/samples/ObjectDetection/src/CvVideoFrameReader.cpp
new file mode 100644
index 0000000000..09b5050973
--- /dev/null
+++ b/samples/ObjectDetection/src/CvVideoFrameReader.cpp
@@ -0,0 +1,98 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include "CvVideoFrameReader.hpp"
+
+namespace od
+{
+
+std::shared_ptr<cv::Mat> CvVideoFrameReader::ReadFrame()
+{
+    // opencv copies data anyway
+    cv::Mat captureFrame;
+    m_capture.read(captureFrame);
+    return std::make_shared<cv::Mat>(std::move(captureFrame));
+}
+
+bool CvVideoFrameReader::IsExhausted(const std::shared_ptr<cv::Mat>& frame) const
+{
+    assert(frame!=nullptr);
+    return frame->empty();
+}
+
+void CvVideoFrameReader::CheckIsOpen(const std::string& source)
+{
+    if (!m_capture.isOpened())
+    {
+        throw std::runtime_error("Failed to open video capture for the source = " + source);
+    }
+}
+
+void CvVideoFrameReader::Init(const std::string& source)
+{
+    m_capture.open(source);
+    CheckIsOpen(source);
+}
+
+int CvVideoFrameReader::GetSourceWidth() const
+{
+    return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_WIDTH)));
+}
+
+int CvVideoFrameReader::GetSourceHeight() const
+{
+    return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_HEIGHT)));
+}
+
+double CvVideoFrameReader::GetSourceFps() const
+{
+    return m_capture.get(cv::CAP_PROP_FPS);
+}
+
+bool CvVideoFrameReader::ConvertToRGB()
+{
+    m_capture.set(cv::CAP_PROP_CONVERT_RGB, 1.0);
+    return static_cast<bool>(m_capture.get(cv::CAP_PROP_CONVERT_RGB));
+}
+
+std::string CvVideoFrameReader::GetSourceEncoding() const
+{
+    char fourccStr[5];
+    auto fourcc = (int)m_capture.get(cv::CAP_PROP_FOURCC);
+    sprintf(fourccStr,"%c%c%c%c",fourcc & 0xFF, (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, (fourcc >> 24) & 0xFF);
+    return fourccStr;
+}
+
+int CvVideoFrameReader::GetSourceEncodingInt() const
+{
+    return (int)m_capture.get(cv::CAP_PROP_FOURCC);
+}
+
+int CvVideoFrameReader::GetFrameCount() const
+{
+    return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_COUNT)));
+};
+
+std::shared_ptr<cv::Mat> CvVideoFrameReaderRgbWrapper::ReadFrame()
+{
+    auto framePtr = m_reader->ReadFrame();
+    if (!IsExhausted(framePtr))
+    {
+        cv::cvtColor(*framePtr, *framePtr, cv::COLOR_BGR2RGB);
+    }
+    return framePtr;
+}
+
+bool CvVideoFrameReaderRgbWrapper::IsExhausted(const std::shared_ptr<cv::Mat>& frame) const
+{
+    return m_reader->IsExhausted(frame);
+}
+
+CvVideoFrameReaderRgbWrapper::CvVideoFrameReaderRgbWrapper(std::unique_ptr<od::CvVideoFrameReader> reader):
+        m_reader(std::move(reader))
+{}
+
+}// namespace od
+\ No newline at end of file
diff --git a/samples/ObjectDetection/src/CvWindowOutput.cpp b/samples/ObjectDetection/src/CvWindowOutput.cpp
new file mode 100644
index 0000000000..a32147b19a
--- /dev/null
+++ b/samples/ObjectDetection/src/CvWindowOutput.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CvWindowOutput.hpp"
+
+namespace od
+{
+
+void CvWindowOutput::Init(const std::string& windowName)
+{
+    m_windowName = windowName;
+    cv::namedWindow(m_windowName, cv::WINDOW_AUTOSIZE);
+}
+
+void CvWindowOutput::WriteFrame(std::shared_ptr<cv::Mat>& frame)
+{
+    cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR);
+    cv::imshow( m_windowName, *frame);
+    cv::waitKey(30);
+}
+
+void CvWindowOutput::Close()
+{
+    cv::destroyWindow(m_windowName);
+}
+
+bool CvWindowOutput::IsReady() const
+{
+    return true;
+}
+}// namespace od
+\ No newline at end of file
diff --git a/samples/ObjectDetection/src/DetectedObject.cpp b/samples/ObjectDetection/src/DetectedObject.cpp
new file mode 100644
index 0000000000..95f99a07d6
--- /dev/null
+++ b/samples/ObjectDetection/src/DetectedObject.cpp
@@ -0,0 +1,65 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DetectedObject.hpp"
+
+namespace od
+{
+
+DetectedObject::DetectedObject() :
+        DetectedObject(0u, "", BoundingBox(), 0u)
+{}
+
+DetectedObject::DetectedObject(
+        unsigned int id,
+        std::string label,
+        const BoundingBox &boundingBox,
+        float score) :
+        m_Id(id),
+        m_Label(std::move(label)),
+        m_BoundingBox(boundingBox),
+        m_Score(score)
+{}
+
+unsigned int DetectedObject::GetId() const
+{
+    return m_Id;
+}
+
+const std::string &DetectedObject::GetLabel() const
+{
+    return m_Label;
+}
+
+const BoundingBox &DetectedObject::GetBoundingBox() const
+{
+    return m_BoundingBox;
+}
+
+float DetectedObject::GetScore() const
+{
+    return m_Score;
+}
+
+void DetectedObject::SetId(unsigned int id)
+{
+    m_Id = id;
+}
+
+void DetectedObject::SetLabel(const std::string &label)
+{
+    m_Label = label;
+}
+
+void DetectedObject::SetBoundingBox(const BoundingBox &boundingBox)
+{
+    m_BoundingBox = boundingBox;
+}
+
+void DetectedObject::SetScore(float score)
+{
+    m_Score = score;
+}
+}// namespace od
+\ No newline at end of file
diff --git a/samples/ObjectDetection/src/ImageUtils.cpp b/samples/ObjectDetection/src/ImageUtils.cpp
new file mode 100644
index 0000000000..9a3ed17b63
--- /dev/null
+++ b/samples/ObjectDetection/src/ImageUtils.cpp
@@ -0,0 +1,126 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ImageUtils.hpp"
+#include "BoundingBox.hpp"
+#include "Types.hpp"
+
+#include <armnn/Logging.hpp>
+
+static cv::Scalar GetScalarColorCode(std::tuple<int, int, int> color)
+{
+    return cv::Scalar(std::get<0>(color), std::get<1>(color), std::get<2>(color));
+}
+
+void AddInferenceOutputToFrame(od::DetectedObjects& decodedResults, cv::Mat& inputFrame,
+                               std::vector<std::tuple<std::string, od::BBoxColor>>& labels)
+{
+    for(const od::DetectedObject& object : decodedResults)
+    {
+        int confidence = static_cast<int>(object.GetScore() * 100);
+        int baseline = 0;
+        std::string textStr;
+        std::tuple<int, int, int> colorCode(255, 0, 0); //red
+
+        if (labels.size() > object.GetId())
+        {
+            auto label = labels[object.GetId()];
+            textStr = std::get<0>(label) + " - " + std::to_string(confidence) + "%";
+            colorCode = std::get<1>(label).colorCode;
+        }
+        else
+        {
+            textStr = std::to_string(object.GetId()) + " - " + std::to_string(confidence) + "%";
+        }
+
+        cv::Size textSize = getTextSize(textStr, cv::FONT_HERSHEY_DUPLEX, 1.0, 1, &baseline);
+
+        const od::BoundingBox& bbox = object.GetBoundingBox();
+
+        if (bbox.GetX() + bbox.GetWidth() > inputFrame.cols)
+        {
+            cv::Rect r(bbox.GetX(), bbox.GetY(), inputFrame.cols - bbox.GetX(), bbox.GetHeight());
+
+            cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0);
+        }
+        else if (bbox.GetY() + bbox.GetHeight() > inputFrame.rows)
+        {
+            cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), inputFrame.rows - bbox.GetY());
+
+            cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0);
+        }
+        else
+        {
+            cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), bbox.GetHeight());
+
+            cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0);
+        }
+
+        int textBoxY = std::max(0 ,bbox.GetY() - textSize.height);
+
+        cv::Rect text(bbox.GetX(), textBoxY, textSize.width, textSize.height);
+
+        cv::rectangle(inputFrame, text, GetScalarColorCode(colorCode), -1);
+
+        cv::Scalar color;
+
+        if(std::get<0>(colorCode) + std::get<1>(colorCode) + std::get<2>(colorCode) > 127)
+        {
+            color = cv::Scalar::all(0);
+        }
+        else
+        {
+            color = cv::Scalar::all(255);
+        }
+
+        cv::putText(inputFrame,
+                    textStr ,
+                    cv::Point(bbox.GetX(), textBoxY + textSize.height -(textSize.height)/3),
+                    cv::FONT_HERSHEY_DUPLEX,
+                    0.5,
+                    color,
+                    1);
+    }
+}
+
+
+void ResizeFrame(const cv::Mat& frame, cv::Mat& dest, const od::Size& aspectRatio)
+{
+    if(&dest != &frame)
+    {
+        double longEdgeInput = std::max(frame.rows, frame.cols);
+        double longEdgeOutput = std::max(aspectRatio.m_Width, aspectRatio.m_Height);
+        const double resizeFactor = longEdgeOutput/longEdgeInput;
+        cv::resize(frame, dest, cv::Size(0, 0), resizeFactor, resizeFactor, DefaultResizeFlag);
+    }
+    else
+    {
+        const std::string warningMessage{"Resize was not performed because resized frame references the source frame."};
+        ARMNN_LOG(warning) << warningMessage;
+    }
+}
+
+/** Pad a frame with zeros (add rows and columns to the end) */
+void PadFrame(const cv::Mat& src, cv::Mat& dest, const int bottom, const int right)
+{
+    if(&dest != &src)
+    {
+        cv::copyMakeBorder(src, dest, 0, bottom, 0, right, cv::BORDER_CONSTANT);
+    }
+    else
+    {
+        const std::string warningMessage
+        {
+            "Pad was not performed because destination frame references the source frame."
+        };
+        ARMNN_LOG(warning) << warningMessage;
+    }
+}
+
+void ResizeWithPad(const cv::Mat& frame, cv::Mat& dest, cv::Mat& cache, const od::Size& destSize)
+{
+    ResizeFrame(frame, cache, destSize);
+    PadFrame(cache, dest,destSize.m_Height - cache.rows,destSize.m_Width - cache.cols);
+}
diff --git a/samples/ObjectDetection/src/Main.cpp b/samples/ObjectDetection/src/Main.cpp
new file mode 100644
index 0000000000..10abb65cce
--- /dev/null
+++ b/samples/ObjectDetection/src/Main.cpp
@@ -0,0 +1,160 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CvVideoFrameReader.hpp"
+#include "CvWindowOutput.hpp"
+#include "CvVideoFileWriter.hpp"
+#include "NetworkPipeline.hpp"
+#include "CmdArgsParser.hpp"
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <random>
+
+/*
+ * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector
+ */
+std::vector<armnn::BackendId> GetPreferredBackendList(const std::string& preferredBackends)
+{
+    std::vector<armnn::BackendId> backends;
+    std::stringstream ss(preferredBackends);
+
+    while(ss.good())
+    {
+        std::string backend;
+        std::getline( ss, backend, ',' );
+        backends.emplace_back(backend);
+    }
+    return backends;
+}
+
+/*
+ * Assigns a color to each label in the label set
+ */
+std::vector<std::tuple<std::string, od::BBoxColor>> AssignColourToLabel(const std::string& pathToLabelFile)
+{
+    std::ifstream in(pathToLabelFile);
+    std::vector<std::tuple<std::string, od::BBoxColor>> labels;
+
+    std::string str;
+    std::default_random_engine generator;
+    std::uniform_int_distribution<int> distribution(0,255);
+
+    while (std::getline(in, str))
+    {
+        if(!str.empty())
+        {
+            od::BBoxColor c{
+                .colorCode = std::make_tuple(distribution(generator),
+                                             distribution(generator),
+                                             distribution(generator))
+            };
+            auto bboxInfo = std::make_tuple (str, c);
+
+            labels.emplace_back(bboxInfo);
+        }
+    }
+    return labels;
+}
+
+std::tuple<std::unique_ptr<od::IFrameReader<cv::Mat>>,
+           std::unique_ptr<od::IFrameOutput<cv::Mat>>>
+           GetFrameSourceAndSink(const std::map<std::string, std::string>& options) {
+
+    std::unique_ptr<od::IFrameReader<cv::Mat>> readerPtr;
+
+    std::unique_ptr<od::CvVideoFrameReader> reader = std::make_unique<od::CvVideoFrameReader>();
+    reader->Init(GetSpecifiedOption(options, VIDEO_FILE_PATH));
+
+    auto enc = reader->GetSourceEncodingInt();
+    auto fps = reader->GetSourceFps();
+    auto w = reader->GetSourceWidth();
+    auto h = reader->GetSourceHeight();
+    if (!reader->ConvertToRGB())
+    {
+        readerPtr = std::move(std::make_unique<od::CvVideoFrameReaderRgbWrapper>(std::move(reader)));
+    }
+    else
+    {
+        readerPtr = std::move(reader);
+    }
+
+    if(CheckOptionSpecified(options, OUTPUT_VIDEO_FILE_PATH))
+    {
+        std::string outputVideo = GetSpecifiedOption(options, OUTPUT_VIDEO_FILE_PATH);
+        auto writer = std::make_unique<od::CvVideoFileWriter>();
+        writer->Init(outputVideo, enc, fps, w, h);
+
+        return std::make_tuple<>(std::move(readerPtr), std::move(writer));
+    }
+    else
+    {
+        auto writer = std::make_unique<od::CvWindowOutput>();
+        writer->Init("Processed Video");
+        return std::make_tuple<>(std::move(readerPtr), std::move(writer));
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    std::map<std::string, std::string> options;
+
+    int result = ParseOptions(options, CMD_OPTIONS, argv, argc);
+    if (result != 0)
+    {
+        return result;
+    }
+
+    // Create the network options
+    od::ODPipelineOptions pipelineOptions;
+    pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH);
+    pipelineOptions.m_ModelName = GetSpecifiedOption(options, MODEL_NAME);
+
+    if(CheckOptionSpecified(options, PREFERRED_BACKENDS))
+    {
+        pipelineOptions.m_backends = GetPreferredBackendList((GetSpecifiedOption(options, PREFERRED_BACKENDS)));
+    }
+    else
+    {
+        pipelineOptions.m_backends = {"CpuAcc", "CpuRef"};
+    }
+
+    auto labels = AssignColourToLabel(GetSpecifiedOption(options, LABEL_PATH));
+
+    od::IPipelinePtr objectDetectionPipeline = od::CreatePipeline(pipelineOptions);
+
+    auto inputAndOutput = GetFrameSourceAndSink(options);
+    std::unique_ptr<od::IFrameReader<cv::Mat>> reader = std::move(std::get<0>(inputAndOutput));
+    std::unique_ptr<od::IFrameOutput<cv::Mat>> sink = std::move(std::get<1>(inputAndOutput));
+
+    if (!sink->IsReady())
+    {
+        std::cerr << "Failed to open video writer.";
+        return 1;
+    }
+
+    od::InferenceResults results;
+
+    std::shared_ptr<cv::Mat> frame = reader->ReadFrame();
+
+    //pre-allocate frames
+    cv::Mat processed;
+
+    while(!reader->IsExhausted(frame))
+    {
+        objectDetectionPipeline->PreProcessing(*frame, processed);
+        objectDetectionPipeline->Inference(processed, results);
+        objectDetectionPipeline->PostProcessing(results,
+                                                [&frame, &labels](od::DetectedObjects detects) -> void {
+            AddInferenceOutputToFrame(detects, *frame, labels);
+        });
+
+        sink->WriteFrame(frame);
+        frame = reader->ReadFrame();
+    }
+    sink->Close();
+    return 0;
+}
diff --git a/samples/ObjectDetection/src/NetworkPipeline.cpp b/samples/ObjectDetection/src/NetworkPipeline.cpp
new file mode 100644
index 0000000000..7f05882fc4
--- /dev/null
+++ b/samples/ObjectDetection/src/NetworkPipeline.cpp
@@ -0,0 +1,102 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NetworkPipeline.hpp"
+#include "ImageUtils.hpp"
+
+namespace od
+{
+
+ObjDetectionPipeline::ObjDetectionPipeline(std::unique_ptr<ArmnnNetworkExecutor> executor,
+                                           std::unique_ptr<IDetectionResultDecoder> decoder) :
+        m_executor(std::move(executor)),
+        m_decoder(std::move(decoder)){}
+
+void od::ObjDetectionPipeline::Inference(const cv::Mat& processed, InferenceResults& result)
+{
+    m_executor->Run(processed.data, processed.total() * processed.elemSize(), result);
+}
+
+void ObjDetectionPipeline::PostProcessing(InferenceResults& inferenceResult,
+        const std::function<void (DetectedObjects)>& callback)
+{
+    DetectedObjects detections = m_decoder->Decode(inferenceResult, m_inputImageSize,
+                                           m_executor->GetImageAspectRatio(), {});
+    if (callback)
+    {
+        callback(detections);
+    }
+}
+
+void ObjDetectionPipeline::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
+{
+    m_inputImageSize.m_Height = frame.rows;
+    m_inputImageSize.m_Width = frame.cols;
+    ResizeWithPad(frame, processed, m_processedFrame, m_executor->GetImageAspectRatio());
+}
+
+MobileNetSSDv1::MobileNetSSDv1(std::unique_ptr<ArmnnNetworkExecutor> executor,
+                               float objectThreshold) :
+        ObjDetectionPipeline(std::move(executor),
+                             std::make_unique<SSDResultDecoder>(objectThreshold))
+{}
+
+void MobileNetSSDv1::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
+{
+    ObjDetectionPipeline::PreProcessing(frame, processed);
+    if (m_executor->GetInputDataType() == armnn::DataType::Float32)
+    {
+        // [0, 255] => [-1.0, 1.0]
+        processed.convertTo(processed, CV_32FC3, 1 / 127.5, -1);
+    }
+}
+
+YoloV3Tiny::YoloV3Tiny(std::unique_ptr<ArmnnNetworkExecutor> executor,
+                       float NMSThreshold, float ClsThreshold, float ObjectThreshold) :
+        ObjDetectionPipeline(std::move(executor),
+                             std::move(std::make_unique<YoloResultDecoder>(NMSThreshold,
+                                                                           ClsThreshold,
+                                                                           ObjectThreshold)))
+{}
+
+void YoloV3Tiny::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
+{
+    ObjDetectionPipeline::PreProcessing(frame, processed);
+    if (m_executor->GetInputDataType() == armnn::DataType::Float32)
+    {
+        processed.convertTo(processed, CV_32FC3);
+    }
+}
+
+IPipelinePtr CreatePipeline(od::ODPipelineOptions& config)
+{
+    auto executor = std::make_unique<od::ArmnnNetworkExecutor>(config.m_ModelFilePath, config.m_backends);
+
+    if (config.m_ModelName == "SSD_MOBILE")
+    {
+        float detectionThreshold = 0.6;
+
+        return std::make_unique<od::MobileNetSSDv1>(std::move(executor),
+                                                    detectionThreshold
+        );
+    }
+    else if (config.m_ModelName == "YOLO_V3_TINY")
+    {
+        float NMSThreshold = 0.6f;
+        float ClsThreshold = 0.6f;
+        float ObjectThreshold = 0.6f;
+        return std::make_unique<od::YoloV3Tiny>(std::move(executor),
+                                                NMSThreshold,
+                                                ClsThreshold,
+                                                ObjectThreshold
+        );
+    }
+    else
+    {
+        throw std::invalid_argument("Unknown Model name: " + config.m_ModelName + " supplied by user.");
+    }
+
+}
+}// namespace od
+\ No newline at end of file
diff --git a/samples/ObjectDetection/src/NonMaxSuppression.cpp b/samples/ObjectDetection/src/NonMaxSuppression.cpp
new file mode 100644
index 0000000000..7bcd9045a5
--- /dev/null
+++ b/samples/ObjectDetection/src/NonMaxSuppression.cpp
@@ -0,0 +1,92 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "NonMaxSuppression.hpp"
+
+#include <algorithm>
+
+namespace od
+{
+
+static std::vector<unsigned int> GenerateRangeK(unsigned int k)
+{
+    std::vector<unsigned int> range(k);
+    std::iota(range.begin(), range.end(), 0);
+    return range;
+}
+
+
+/**
+* @brief Returns the intersection over union for two bounding boxes
+*
+* @param[in]  First detect containing bounding box.
+* @param[in]  Second detect containing bounding box.
+* @return     Calculated intersection over union.
+*
+*/
+static double IntersectionOverUnion(DetectedObject& detect1, DetectedObject& detect2)
+{
+    uint32_t area1 = (detect1.GetBoundingBox().GetHeight() * detect1.GetBoundingBox().GetWidth());
+    uint32_t area2 = (detect2.GetBoundingBox().GetHeight() * detect2.GetBoundingBox().GetWidth());
+
+    float yMinIntersection = std::max(detect1.GetBoundingBox().GetY(), detect2.GetBoundingBox().GetY());
+    float xMinIntersection = std::max(detect1.GetBoundingBox().GetX(), detect2.GetBoundingBox().GetX());
+
+    float yMaxIntersection = std::min(detect1.GetBoundingBox().GetY() + detect1.GetBoundingBox().GetHeight(),
+                                      detect2.GetBoundingBox().GetY() + detect2.GetBoundingBox().GetHeight());
+    float xMaxIntersection = std::min(detect1.GetBoundingBox().GetX() + detect1.GetBoundingBox().GetWidth(),
+                                      detect2.GetBoundingBox().GetX() + detect2.GetBoundingBox().GetWidth());
+
+    double areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
+                              std::max(xMaxIntersection - xMinIntersection, 0.0f);
+    double areaUnion = area1 + area2 - areaIntersection;
+
+    return areaIntersection / areaUnion;
+}
+
+std::vector<int> NonMaxSuppression(DetectedObjects& inputDetections, float iouThresh)
+{
+    // Sort indicies of detections by highest score to lowest.
+    std::vector<unsigned int> sortedIndicies = GenerateRangeK(inputDetections.size());
+    std::sort(sortedIndicies.begin(), sortedIndicies.end(),
+        [&inputDetections](int idx1, int idx2)
+        {
+            return inputDetections[idx1].GetScore() > inputDetections[idx2].GetScore();
+        });
+
+    std::vector<bool> visited(inputDetections.size(), false);
+    std::vector<int> outputIndiciesAfterNMS;
+
+    for (int i=0; i < inputDetections.size(); ++i)
+    {
+        // Each new unvisited detect should be kept.
+        if (!visited[sortedIndicies[i]])
+        {
+            outputIndiciesAfterNMS.emplace_back(sortedIndicies[i]);
+            visited[sortedIndicies[i]] = true;
+        }
+
+        // Look for detections to suppress.
+        for (int j=i+1; j<inputDetections.size(); ++j)
+        {
+            // Skip if already kept or suppressed.
+            if (!visited[sortedIndicies[j]])
+            {
+                // Detects must have the same label to be suppressed.
+                if (inputDetections[sortedIndicies[j]].GetLabel() == inputDetections[sortedIndicies[i]].GetLabel())
+                {
+                    auto iou = IntersectionOverUnion(inputDetections[sortedIndicies[i]],
+                                                    inputDetections[sortedIndicies[j]]);
+                    if (iou > iouThresh)
+                    {
+                        visited[sortedIndicies[j]] = true;
+                    }
+                }
+            }
+        }
+    }
+    return outputIndiciesAfterNMS;
+}
+
+} // namespace od
diff --git a/samples/ObjectDetection/src/SSDResultDecoder.cpp b/samples/ObjectDetection/src/SSDResultDecoder.cpp
new file mode 100644
index 0000000000..a3319212e5
--- /dev/null
+++ b/samples/ObjectDetection/src/SSDResultDecoder.cpp
@@ -0,0 +1,80 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SSDResultDecoder.hpp"
+
+#include <cassert>
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+namespace od
+{
+
+DetectedObjects SSDResultDecoder::Decode(const InferenceResults& networkResults,
+    const Size& outputFrameSize,
+    const Size& resizedFrameSize,
+    const std::vector<std::string>& labels)
+{
+    // SSD network outputs 4 tensors: bounding boxes, labels, probabilities, number of detections.
+    if (networkResults.size() != 4)
+    {
+        throw std::runtime_error("Number of outputs from SSD model doesn't equal 4");
+    }
+
+    DetectedObjects detectedObjects;
+    const int numDetections = static_cast<int>(std::lround(networkResults[3][0]));
+
+    double longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height);
+    double longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height);
+    const double resizeFactor = longEdgeOutput/longEdgeInput;
+
+    for (int i=0; i<numDetections; ++i)
+    {
+        if (networkResults[2][i] > m_objectThreshold)
+        {
+            DetectedObject detectedObject;
+            detectedObject.SetScore(networkResults[2][i]);
+            auto classId = std::lround(networkResults[1][i]);
+
+            if (classId < labels.size())
+            {
+                detectedObject.SetLabel(labels[classId]);
+            }
+            else
+            {
+                detectedObject.SetLabel(std::to_string(classId));
+            }
+            detectedObject.SetId(classId);
+
+            // Convert SSD bbox outputs (ratios of image size) to pixel values.
+            double topLeftY = networkResults[0][i*4 + 0] * resizedFrameSize.m_Height;
+            double topLeftX = networkResults[0][i*4 + 1] * resizedFrameSize.m_Width;
+            double botRightY = networkResults[0][i*4 + 2] * resizedFrameSize.m_Height;
+            double botRightX = networkResults[0][i*4 + 3] * resizedFrameSize.m_Width;
+
+            // Scale the coordinates to output frame size.
+            topLeftY *= resizeFactor;
+            topLeftX *= resizeFactor;
+            botRightY *= resizeFactor;
+            botRightX *= resizeFactor;
+
+            assert(botRightX > topLeftX);
+            assert(botRightY > topLeftY);
+
+            // Internal BoundingBox stores box top left x,y and width, height.
+            detectedObject.SetBoundingBox({static_cast<int>(std::round(topLeftX)),
+                                           static_cast<int>(std::round(topLeftY)),
+                                           static_cast<unsigned int>(botRightX - topLeftX),
+                                           static_cast<unsigned int>(botRightY - topLeftY)});
+
+            detectedObjects.emplace_back(detectedObject);
+        }
+    }
+    return detectedObjects;
+}
+
+SSDResultDecoder::SSDResultDecoder(float ObjectThreshold) : m_objectThreshold(ObjectThreshold) {}
+
+}// namespace od
+\ No newline at end of file
diff --git a/samples/ObjectDetection/src/YoloResultDecoder.cpp b/samples/ObjectDetection/src/YoloResultDecoder.cpp
new file mode 100644
index 0000000000..ffbf7cb68d
--- /dev/null
+++ b/samples/ObjectDetection/src/YoloResultDecoder.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "YoloResultDecoder.hpp"
+
+#include "NonMaxSuppression.hpp"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace od
+{
+
+DetectedObjects YoloResultDecoder::Decode(const InferenceResults& networkResults,
+                                         const Size& outputFrameSize,
+                                         const Size& resizedFrameSize,
+                                         const std::vector<std::string>& labels)
+{
+
+    // Yolo v3 network outputs 1 tensor
+    if (networkResults.size() != 1)
+    {
+        throw std::runtime_error("Number of outputs from Yolo model doesn't equal 1");
+    }
+    auto element_step = m_boxElements + m_confidenceElements + m_numClasses;
+
+    float longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height);
+    float longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height);
+    const float resizeFactor = longEdgeOutput/longEdgeInput;
+
+    DetectedObjects detectedObjects;
+    DetectedObjects resultsAfterNMS;
+
+    for (const InferenceResult& result : networkResults)
+    {
+        for (unsigned int i = 0; i < m_numBoxes; ++i)
+        {
+            const float* cur_box = &result[i * element_step];
+            // Objectness score
+            if (cur_box[4] > m_objectThreshold)
+            {
+                for (unsigned int classIndex = 0; classIndex < m_numClasses; ++classIndex)
+                {
+                    const float class_prob =  cur_box[4] * cur_box[5 + classIndex];
+
+                    // class confidence
+
+                    if (class_prob > m_ClsThreshold)
+                    {
+                        DetectedObject detectedObject;
+
+                        detectedObject.SetScore(class_prob);
+
+                        float topLeftX = cur_box[0] * resizeFactor;
+                        float topLeftY = cur_box[1] * resizeFactor;
+                        float botRightX = cur_box[2] * resizeFactor;
+                        float botRightY = cur_box[3] * resizeFactor;
+
+                        assert(botRightX > topLeftX);
+                        assert(botRightY > topLeftY);
+
+                        detectedObject.SetBoundingBox({static_cast<int>(topLeftX),
+                                                       static_cast<int>(topLeftY),
+                                                       static_cast<unsigned int>(botRightX-topLeftX),
+                                                       static_cast<unsigned int>(botRightY-topLeftY)});
+                        if(labels.size() > classIndex)
+                        {
+                            detectedObject.SetLabel(labels.at(classIndex));
+                        }
+                        else
+                        {
+                            detectedObject.SetLabel(std::to_string(classIndex));
+                        }
+                        detectedObject.SetId(classIndex);
+                        detectedObjects.emplace_back(detectedObject);
+                    }
+                }
+            }
+        }
+
+        std::vector<int> keepIndiciesAfterNMS = od::NonMaxSuppression(detectedObjects, m_NmsThreshold);
+
+        for (const int ind: keepIndiciesAfterNMS)
+        {
+            resultsAfterNMS.emplace_back(detectedObjects[ind]);
+        }
+    }
+
+    return resultsAfterNMS;
+}
+
+YoloResultDecoder::YoloResultDecoder(float NMSThreshold, float ClsThreshold, float ObjectThreshold)
+        : m_NmsThreshold(NMSThreshold), m_ClsThreshold(ClsThreshold), m_objectThreshold(ObjectThreshold) {}
+
+}// namespace od
+
+
+
author	Éanna Ó Catháin <eanna.ocathain@arm.com>	2020-09-14 17:36:49 +0100
committer	Jim Flynn <jim.flynn@arm.com>	2020-09-14 18:40:01 +0000
commit	919c14ef132986aa1514b2070ce6d19b5579a6ab (patch)
tree	5c281e02a083768f65871cb861ab9b32ac7d8767 /samples/ObjectDetection/src
parent	589e3e81a86c83456580e112978bf7a0ed5f43ac (diff)
download	armnn-919c14ef132986aa1514b2070ce6d19b5579a6ab.tar.gz