aboutsummaryrefslogtreecommitdiff
path: root/samples/ObjectDetection/src
diff options
context:
space:
mode:
authorÉanna Ó Catháin <eanna.ocathain@arm.com>2020-09-14 17:36:49 +0100
committerJim Flynn <jim.flynn@arm.com>2020-09-14 18:40:01 +0000
commit919c14ef132986aa1514b2070ce6d19b5579a6ab (patch)
tree5c281e02a083768f65871cb861ab9b32ac7d8767 /samples/ObjectDetection/src
parent589e3e81a86c83456580e112978bf7a0ed5f43ac (diff)
downloadarmnn-919c14ef132986aa1514b2070ce6d19b5579a6ab.tar.gz
MLECO-929 Add Object Detection sample application using the public ArmNN C++ API
Change-Id: I14aa1b4b726212cffbefd6687203f93f936fa872 Signed-off-by: Éanna Ó Catháin <eanna.ocathain@arm.com>
Diffstat (limited to 'samples/ObjectDetection/src')
-rw-r--r--samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp140
-rw-r--r--samples/ObjectDetection/src/BoundingBox.cpp116
-rw-r--r--samples/ObjectDetection/src/CmdArgsParser.cpp70
-rw-r--r--samples/ObjectDetection/src/CvVideoFileWriter.cpp38
-rw-r--r--samples/ObjectDetection/src/CvVideoFrameReader.cpp98
-rw-r--r--samples/ObjectDetection/src/CvWindowOutput.cpp33
-rw-r--r--samples/ObjectDetection/src/DetectedObject.cpp65
-rw-r--r--samples/ObjectDetection/src/ImageUtils.cpp126
-rw-r--r--samples/ObjectDetection/src/Main.cpp160
-rw-r--r--samples/ObjectDetection/src/NetworkPipeline.cpp102
-rw-r--r--samples/ObjectDetection/src/NonMaxSuppression.cpp92
-rw-r--r--samples/ObjectDetection/src/SSDResultDecoder.cpp80
-rw-r--r--samples/ObjectDetection/src/YoloResultDecoder.cpp100
13 files changed, 1220 insertions, 0 deletions
diff --git a/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp
new file mode 100644
index 0000000000..cb4c0c9f84
--- /dev/null
+++ b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp
@@ -0,0 +1,140 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ArmnnNetworkExecutor.hpp"
+#include "Types.hpp"
+
+#include <random>
+#include <string>
+
+namespace od
+{
+
+armnn::DataType ArmnnNetworkExecutor::GetInputDataType() const
+{
+ return m_inputBindingInfo.second.GetDataType();
+}
+
+ArmnnNetworkExecutor::ArmnnNetworkExecutor(std::string& modelPath,
+ std::vector<armnn::BackendId>& preferredBackends)
+: m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions()))
+{
+ // Import the TensorFlow lite model.
+ armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create();
+ armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
+
+ std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0);
+
+ m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]);
+
+ m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0);
+
+ std::vector<armnn::BindingPointInfo> outputBindings;
+ for(const std::string& name : m_outputLayerNamesList)
+ {
+ m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name)));
+ }
+
+ std::vector<std::string> errorMessages;
+ // optimize the network.
+ armnn::IOptimizedNetworkPtr optNet = Optimize(*network,
+ preferredBackends,
+ m_Runtime->GetDeviceSpec(),
+ armnn::OptimizerOptions(),
+ armnn::Optional<std::vector<std::string>&>(errorMessages));
+
+ if (!optNet)
+ {
+ const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"};
+ ARMNN_LOG(error) << errorMessage;
+ throw armnn::Exception(errorMessage);
+ }
+
+ // Load the optimized network onto the m_Runtime device
+ std::string errorMessage;
+ if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage))
+ {
+ ARMNN_LOG(error) << errorMessage;
+ }
+
+ //pre-allocate memory for output (the size of it never changes)
+ for (int it = 0; it < m_outputLayerNamesList.size(); ++it)
+ {
+ const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType();
+ const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape();
+
+ InferenceResult oneLayerOutResult;
+ switch (dataType)
+ {
+ case armnn::DataType::Float32:
+ {
+ oneLayerOutResult.resize(tensorShape.GetNumElements(), 0);
+ break;
+ }
+ default:
+ {
+ errorMessage = "ArmnnNetworkExecutor: unsupported output tensor data type";
+ ARMNN_LOG(error) << errorMessage << " " << log_as_int(dataType);
+ throw armnn::Exception(errorMessage);
+ }
+ }
+
+ m_OutputBuffer.emplace_back(oneLayerOutResult);
+
+ // Make ArmNN output tensors
+ m_OutputTensors.reserve(m_OutputBuffer.size());
+ for (size_t it = 0; it < m_OutputBuffer.size(); ++it)
+ {
+ m_OutputTensors.emplace_back(std::make_pair(
+ m_outputBindingInfo[it].first,
+ armnn::Tensor(m_outputBindingInfo[it].second,
+ m_OutputBuffer.at(it).data())
+ ));
+ }
+ }
+
+}
+
+void ArmnnNetworkExecutor::PrepareTensors(const void* inputData, const size_t dataBytes)
+{
+ assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes);
+ m_InputTensors.clear();
+ m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}};
+}
+
+bool ArmnnNetworkExecutor::Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults)
+{
+ /* Prepare tensors if they are not ready */
+ ARMNN_LOG(debug) << "Preparing tensors...";
+ this->PrepareTensors(inputData, dataBytes);
+ ARMNN_LOG(trace) << "Running inference...";
+
+ armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors);
+
+ std::stringstream inferenceFinished;
+ inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n";
+
+ ARMNN_LOG(trace) << inferenceFinished.str();
+
+ if (ret == armnn::Status::Failure)
+ {
+ ARMNN_LOG(error) << "Failed to perform inference.";
+ }
+
+ outResults.reserve(m_outputLayerNamesList.size());
+ outResults = m_OutputBuffer;
+
+ return (armnn::Status::Success == ret);
+}
+
+Size ArmnnNetworkExecutor::GetImageAspectRatio()
+{
+ const auto shape = m_inputBindingInfo.second.GetShape();
+ assert(shape.GetNumDimensions() == 4);
+ armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC);
+ return Size(shape[nhwc.GetWidthIndex()],
+ shape[nhwc.GetHeightIndex()]);
+}
+}// namespace od \ No newline at end of file
diff --git a/samples/ObjectDetection/src/BoundingBox.cpp b/samples/ObjectDetection/src/BoundingBox.cpp
new file mode 100644
index 0000000000..c52b0fe58a
--- /dev/null
+++ b/samples/ObjectDetection/src/BoundingBox.cpp
@@ -0,0 +1,116 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "BoundingBox.hpp"
+#include <algorithm>
+namespace od
+{
+
+BoundingBox::BoundingBox() :
+ BoundingBox(0, 0, 0u, 0u) {}
+
+BoundingBox::BoundingBox(
+ int x,
+ int y,
+ unsigned int width,
+ unsigned int height) :
+ m_X(x),
+ m_Y(y),
+ m_Width(width),
+ m_Height(height) {}
+
+BoundingBox::BoundingBox(const BoundingBox& other) :
+ m_X(other.m_X),
+ m_Y(other.m_Y),
+ m_Width(other.m_Width),
+ m_Height(other.m_Height) {}
+
+int BoundingBox::GetX() const {
+ return m_X;
+}
+
+int BoundingBox::GetY() const {
+ return m_Y;
+}
+
+unsigned int BoundingBox::GetWidth() const {
+ return m_Width;
+}
+
+unsigned int BoundingBox::GetHeight() const {
+ return m_Height;
+}
+
+void BoundingBox::SetX(int x) {
+ m_X = x;
+}
+
+void BoundingBox::SetY(int y) {
+ m_Y = y;
+}
+
+void BoundingBox::SetWidth(unsigned int width) {
+ m_Width = width;
+}
+
+void BoundingBox::SetHeight(unsigned int height) {
+ m_Height = height;
+}
+
+BoundingBox& BoundingBox::operator=(const BoundingBox& other) {
+ m_X = other.m_X;
+ m_Y = other.m_Y;
+
+ m_Width = other.m_Width;
+ m_Height = other.m_Height;
+
+ return *this;
+}
+
+/* Helper function to get a "valid" bounding box */
+void GetValidBoundingBox(const BoundingBox& boxIn, BoundingBox& boxOut,
+ const BoundingBox& boxLimits) {
+ boxOut.SetX(std::max(boxIn.GetX(), boxLimits.GetX()));
+ boxOut.SetY(std::max(boxIn.GetY(), boxLimits.GetY()));
+
+ /* If we have changed x and/or y, we compensate by reducing the height and/or width */
+ int boxOutWidth = static_cast<int>(boxIn.GetWidth()) -
+ std::max(0, (boxOut.GetX() - boxIn.GetX()));
+ int boxOutHeight = static_cast<int>(boxIn.GetHeight()) -
+ std::max(0, (boxOut.GetY() - boxIn.GetY()));
+
+ /* This suggests that there was no overlap on x or/and y axis */
+ if (boxOutHeight <= 0 || boxOutWidth <= 0)
+ {
+ boxOut = BoundingBox{0, 0, 0, 0};
+ return;
+ }
+
+ const int limitBoxRightX = boxLimits.GetX() + static_cast<int>(boxLimits.GetWidth());
+ const int limitBoxRightY = boxLimits.GetY() + static_cast<int>(boxLimits.GetHeight());
+ const int boxRightX = boxOut.GetX() + boxOutWidth;
+ const int boxRightY = boxOut.GetY() + boxOutHeight;
+
+ if (boxRightX > limitBoxRightX)
+ {
+ boxOutWidth -= (boxRightX - limitBoxRightX);
+ }
+
+ if (boxRightY > limitBoxRightY)
+ {
+ boxOutHeight -= (boxRightY - limitBoxRightY);
+ }
+
+ /* This suggests value has rolled over because of very high numbers, not handled for now */
+ if (boxOutHeight <= 0 || boxOutWidth <= 0)
+ {
+ boxOut = BoundingBox{0, 0, 0, 0};
+ return;
+ }
+
+ boxOut.SetHeight(static_cast<unsigned int>(boxOutHeight));
+ boxOut.SetWidth(static_cast<unsigned int>(boxOutWidth));
+}
+}// namespace od \ No newline at end of file
diff --git a/samples/ObjectDetection/src/CmdArgsParser.cpp b/samples/ObjectDetection/src/CmdArgsParser.cpp
new file mode 100644
index 0000000000..b8c74bc10f
--- /dev/null
+++ b/samples/ObjectDetection/src/CmdArgsParser.cpp
@@ -0,0 +1,70 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CmdArgsParser.hpp"
+#include <iostream>
+/*
+ * Checks that a particular option was specified by the user
+ */
+bool CheckOptionSpecified(const std::map<std::string, std::string>& options, const std::string& option)
+{
+ auto it = options.find(option);
+ return it!=options.end();
+}
+
+/*
+ * Retrieves the user provided option
+ */
+std::string GetSpecifiedOption(const std::map<std::string, std::string>& options, const std::string& option)
+{
+ if (CheckOptionSpecified(options, option)){
+ return options.at(option);
+ }
+ else
+ {
+ throw std::invalid_argument("Required option: " + option + " not defined.");
+ }
+}
+
+/*
+ * Parses all the command line options provided by the user and stores in a map.
+ */
+int ParseOptions(std::map<std::string, std::string>& options, std::map<std::string, std::string>& acceptedOptions,
+ char *argv[], int argc)
+{
+ for (int i = 1; i < argc; ++i)
+ {
+ std::string currentOption = std::string(argv[i]);
+ auto it = acceptedOptions.find(currentOption);
+ if (it != acceptedOptions.end())
+ {
+ if (i + 1 < argc && std::string(argv[i + 1]).rfind("--", 0) != 0)
+ {
+ std::string value = argv[++i];
+ options.insert({it->first, value});
+ }
+ else if (std::string(argv[i]) == HELP)
+ {
+ std::cout << "Available options" << std::endl;
+ for (auto & acceptedOption : acceptedOptions)
+ {
+ std::cout << acceptedOption.first << " : " << acceptedOption.second << std::endl;
+ }
+ return 2;
+ }
+ else
+ {
+ std::cerr << std::string(argv[i]) << " option requires one argument." << std::endl;
+ return 1;
+ }
+ }
+ else
+ {
+ std::cerr << "Unrecognised option: " << std::string(argv[i]) << std::endl;
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/samples/ObjectDetection/src/CvVideoFileWriter.cpp b/samples/ObjectDetection/src/CvVideoFileWriter.cpp
new file mode 100644
index 0000000000..ab80b95d49
--- /dev/null
+++ b/samples/ObjectDetection/src/CvVideoFileWriter.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CvVideoFileWriter.hpp"
+
+namespace od
+{
+
+void CvVideoFileWriter::Init(const std::string& outputVideo, int encoding, double fps, int width, int height)
+{
+ m_ready = m_cvWriter.open(outputVideo, cv::CAP_FFMPEG,
+ encoding,
+ fps,
+ cv::Size(width, height), true);
+}
+
+
+void CvVideoFileWriter::WriteFrame(std::shared_ptr<cv::Mat>& frame)
+{
+ if(m_cvWriter.isOpened())
+ {
+ cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR);
+ m_cvWriter.write(*frame);
+ }
+}
+
+bool CvVideoFileWriter::IsReady() const
+{
+ return m_ready;
+}
+
+void CvVideoFileWriter::Close()
+{
+ m_cvWriter.release();
+}
+}// namespace od
diff --git a/samples/ObjectDetection/src/CvVideoFrameReader.cpp b/samples/ObjectDetection/src/CvVideoFrameReader.cpp
new file mode 100644
index 0000000000..09b5050973
--- /dev/null
+++ b/samples/ObjectDetection/src/CvVideoFrameReader.cpp
@@ -0,0 +1,98 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include "CvVideoFrameReader.hpp"
+
+namespace od
+{
+
+std::shared_ptr<cv::Mat> CvVideoFrameReader::ReadFrame()
+{
+ // opencv copies data anyway
+ cv::Mat captureFrame;
+ m_capture.read(captureFrame);
+ return std::make_shared<cv::Mat>(std::move(captureFrame));
+}
+
+bool CvVideoFrameReader::IsExhausted(const std::shared_ptr<cv::Mat>& frame) const
+{
+ assert(frame!=nullptr);
+ return frame->empty();
+}
+
+void CvVideoFrameReader::CheckIsOpen(const std::string& source)
+{
+ if (!m_capture.isOpened())
+ {
+ throw std::runtime_error("Failed to open video capture for the source = " + source);
+ }
+}
+
+void CvVideoFrameReader::Init(const std::string& source)
+{
+ m_capture.open(source);
+ CheckIsOpen(source);
+}
+
+int CvVideoFrameReader::GetSourceWidth() const
+{
+ return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_WIDTH)));
+}
+
+int CvVideoFrameReader::GetSourceHeight() const
+{
+ return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_HEIGHT)));
+}
+
+double CvVideoFrameReader::GetSourceFps() const
+{
+ return m_capture.get(cv::CAP_PROP_FPS);
+}
+
+bool CvVideoFrameReader::ConvertToRGB()
+{
+ m_capture.set(cv::CAP_PROP_CONVERT_RGB, 1.0);
+ return static_cast<bool>(m_capture.get(cv::CAP_PROP_CONVERT_RGB));
+}
+
+std::string CvVideoFrameReader::GetSourceEncoding() const
+{
+ char fourccStr[5];
+ auto fourcc = (int)m_capture.get(cv::CAP_PROP_FOURCC);
+ sprintf(fourccStr,"%c%c%c%c",fourcc & 0xFF, (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, (fourcc >> 24) & 0xFF);
+ return fourccStr;
+}
+
+int CvVideoFrameReader::GetSourceEncodingInt() const
+{
+ return (int)m_capture.get(cv::CAP_PROP_FOURCC);
+}
+
+int CvVideoFrameReader::GetFrameCount() const
+{
+ return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_COUNT)));
+};
+
+std::shared_ptr<cv::Mat> CvVideoFrameReaderRgbWrapper::ReadFrame()
+{
+ auto framePtr = m_reader->ReadFrame();
+ if (!IsExhausted(framePtr))
+ {
+ cv::cvtColor(*framePtr, *framePtr, cv::COLOR_BGR2RGB);
+ }
+ return framePtr;
+}
+
+bool CvVideoFrameReaderRgbWrapper::IsExhausted(const std::shared_ptr<cv::Mat>& frame) const
+{
+ return m_reader->IsExhausted(frame);
+}
+
+CvVideoFrameReaderRgbWrapper::CvVideoFrameReaderRgbWrapper(std::unique_ptr<od::CvVideoFrameReader> reader):
+ m_reader(std::move(reader))
+{}
+
+}// namespace od \ No newline at end of file
diff --git a/samples/ObjectDetection/src/CvWindowOutput.cpp b/samples/ObjectDetection/src/CvWindowOutput.cpp
new file mode 100644
index 0000000000..a32147b19a
--- /dev/null
+++ b/samples/ObjectDetection/src/CvWindowOutput.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CvWindowOutput.hpp"
+
+namespace od
+{
+
+void CvWindowOutput::Init(const std::string& windowName)
+{
+ m_windowName = windowName;
+ cv::namedWindow(m_windowName, cv::WINDOW_AUTOSIZE);
+}
+
+void CvWindowOutput::WriteFrame(std::shared_ptr<cv::Mat>& frame)
+{
+ cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR);
+ cv::imshow( m_windowName, *frame);
+ cv::waitKey(30);
+}
+
+void CvWindowOutput::Close()
+{
+ cv::destroyWindow(m_windowName);
+}
+
+bool CvWindowOutput::IsReady() const
+{
+ return true;
+}
+}// namespace od \ No newline at end of file
diff --git a/samples/ObjectDetection/src/DetectedObject.cpp b/samples/ObjectDetection/src/DetectedObject.cpp
new file mode 100644
index 0000000000..95f99a07d6
--- /dev/null
+++ b/samples/ObjectDetection/src/DetectedObject.cpp
@@ -0,0 +1,65 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "DetectedObject.hpp"
+
+namespace od
+{
+
+DetectedObject::DetectedObject() :
+ DetectedObject(0u, "", BoundingBox(), 0u)
+{}
+
+DetectedObject::DetectedObject(
+ unsigned int id,
+ std::string label,
+ const BoundingBox &boundingBox,
+ float score) :
+ m_Id(id),
+ m_Label(std::move(label)),
+ m_BoundingBox(boundingBox),
+ m_Score(score)
+{}
+
+unsigned int DetectedObject::GetId() const
+{
+ return m_Id;
+}
+
+const std::string &DetectedObject::GetLabel() const
+{
+ return m_Label;
+}
+
+const BoundingBox &DetectedObject::GetBoundingBox() const
+{
+ return m_BoundingBox;
+}
+
+float DetectedObject::GetScore() const
+{
+ return m_Score;
+}
+
+void DetectedObject::SetId(unsigned int id)
+{
+ m_Id = id;
+}
+
+void DetectedObject::SetLabel(const std::string &label)
+{
+ m_Label = label;
+}
+
+void DetectedObject::SetBoundingBox(const BoundingBox &boundingBox)
+{
+ m_BoundingBox = boundingBox;
+}
+
+void DetectedObject::SetScore(float score)
+{
+ m_Score = score;
+}
+}// namespace od \ No newline at end of file
diff --git a/samples/ObjectDetection/src/ImageUtils.cpp b/samples/ObjectDetection/src/ImageUtils.cpp
new file mode 100644
index 0000000000..9a3ed17b63
--- /dev/null
+++ b/samples/ObjectDetection/src/ImageUtils.cpp
@@ -0,0 +1,126 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ImageUtils.hpp"
+#include "BoundingBox.hpp"
+#include "Types.hpp"
+
+#include <armnn/Logging.hpp>
+
+static cv::Scalar GetScalarColorCode(std::tuple<int, int, int> color)
+{
+ return cv::Scalar(std::get<0>(color), std::get<1>(color), std::get<2>(color));
+}
+
+void AddInferenceOutputToFrame(od::DetectedObjects& decodedResults, cv::Mat& inputFrame,
+ std::vector<std::tuple<std::string, od::BBoxColor>>& labels)
+{
+ for(const od::DetectedObject& object : decodedResults)
+ {
+ int confidence = static_cast<int>(object.GetScore() * 100);
+ int baseline = 0;
+ std::string textStr;
+ std::tuple<int, int, int> colorCode(255, 0, 0); //red
+
+ if (labels.size() > object.GetId())
+ {
+ auto label = labels[object.GetId()];
+ textStr = std::get<0>(label) + " - " + std::to_string(confidence) + "%";
+ colorCode = std::get<1>(label).colorCode;
+ }
+ else
+ {
+ textStr = std::to_string(object.GetId()) + " - " + std::to_string(confidence) + "%";
+ }
+
+ cv::Size textSize = getTextSize(textStr, cv::FONT_HERSHEY_DUPLEX, 1.0, 1, &baseline);
+
+ const od::BoundingBox& bbox = object.GetBoundingBox();
+
+ if (bbox.GetX() + bbox.GetWidth() > inputFrame.cols)
+ {
+ cv::Rect r(bbox.GetX(), bbox.GetY(), inputFrame.cols - bbox.GetX(), bbox.GetHeight());
+
+ cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0);
+ }
+ else if (bbox.GetY() + bbox.GetHeight() > inputFrame.rows)
+ {
+ cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), inputFrame.rows - bbox.GetY());
+
+ cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0);
+ }
+ else
+ {
+ cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), bbox.GetHeight());
+
+ cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0);
+ }
+
+ int textBoxY = std::max(0 ,bbox.GetY() - textSize.height);
+
+ cv::Rect text(bbox.GetX(), textBoxY, textSize.width, textSize.height);
+
+ cv::rectangle(inputFrame, text, GetScalarColorCode(colorCode), -1);
+
+ cv::Scalar color;
+
+ if(std::get<0>(colorCode) + std::get<1>(colorCode) + std::get<2>(colorCode) > 127)
+ {
+ color = cv::Scalar::all(0);
+ }
+ else
+ {
+ color = cv::Scalar::all(255);
+ }
+
+ cv::putText(inputFrame,
+ textStr ,
+ cv::Point(bbox.GetX(), textBoxY + textSize.height -(textSize.height)/3),
+ cv::FONT_HERSHEY_DUPLEX,
+ 0.5,
+ color,
+ 1);
+ }
+}
+
+
+void ResizeFrame(const cv::Mat& frame, cv::Mat& dest, const od::Size& aspectRatio)
+{
+ if(&dest != &frame)
+ {
+ double longEdgeInput = std::max(frame.rows, frame.cols);
+ double longEdgeOutput = std::max(aspectRatio.m_Width, aspectRatio.m_Height);
+ const double resizeFactor = longEdgeOutput/longEdgeInput;
+ cv::resize(frame, dest, cv::Size(0, 0), resizeFactor, resizeFactor, DefaultResizeFlag);
+ }
+ else
+ {
+ const std::string warningMessage{"Resize was not performed because resized frame references the source frame."};
+ ARMNN_LOG(warning) << warningMessage;
+ }
+}
+
+/** Pad a frame with zeros (add rows and columns to the end) */
+void PadFrame(const cv::Mat& src, cv::Mat& dest, const int bottom, const int right)
+{
+ if(&dest != &src)
+ {
+ cv::copyMakeBorder(src, dest, 0, bottom, 0, right, cv::BORDER_CONSTANT);
+ }
+ else
+ {
+ const std::string warningMessage
+ {
+ "Pad was not performed because destination frame references the source frame."
+ };
+ ARMNN_LOG(warning) << warningMessage;
+ }
+}
+
+void ResizeWithPad(const cv::Mat& frame, cv::Mat& dest, cv::Mat& cache, const od::Size& destSize)
+{
+ ResizeFrame(frame, cache, destSize);
+ PadFrame(cache, dest,destSize.m_Height - cache.rows,destSize.m_Width - cache.cols);
+}
diff --git a/samples/ObjectDetection/src/Main.cpp b/samples/ObjectDetection/src/Main.cpp
new file mode 100644
index 0000000000..10abb65cce
--- /dev/null
+++ b/samples/ObjectDetection/src/Main.cpp
@@ -0,0 +1,160 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "CvVideoFrameReader.hpp"
+#include "CvWindowOutput.hpp"
+#include "CvVideoFileWriter.hpp"
+#include "NetworkPipeline.hpp"
+#include "CmdArgsParser.hpp"
+
+#include <fstream>
+#include <iostream>
+#include <map>
+#include <random>
+
+/*
+ * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector
+ */
+std::vector<armnn::BackendId> GetPreferredBackendList(const std::string& preferredBackends)
+{
+ std::vector<armnn::BackendId> backends;
+ std::stringstream ss(preferredBackends);
+
+ while(ss.good())
+ {
+ std::string backend;
+ std::getline( ss, backend, ',' );
+ backends.emplace_back(backend);
+ }
+ return backends;
+}
+
+/*
+ * Assigns a color to each label in the label set
+ */
+std::vector<std::tuple<std::string, od::BBoxColor>> AssignColourToLabel(const std::string& pathToLabelFile)
+{
+ std::ifstream in(pathToLabelFile);
+ std::vector<std::tuple<std::string, od::BBoxColor>> labels;
+
+ std::string str;
+ std::default_random_engine generator;
+ std::uniform_int_distribution<int> distribution(0,255);
+
+ while (std::getline(in, str))
+ {
+ if(!str.empty())
+ {
+ od::BBoxColor c{
+ .colorCode = std::make_tuple(distribution(generator),
+ distribution(generator),
+ distribution(generator))
+ };
+ auto bboxInfo = std::make_tuple (str, c);
+
+ labels.emplace_back(bboxInfo);
+ }
+ }
+ return labels;
+}
+
+std::tuple<std::unique_ptr<od::IFrameReader<cv::Mat>>,
+ std::unique_ptr<od::IFrameOutput<cv::Mat>>>
+ GetFrameSourceAndSink(const std::map<std::string, std::string>& options) {
+
+ std::unique_ptr<od::IFrameReader<cv::Mat>> readerPtr;
+
+ std::unique_ptr<od::CvVideoFrameReader> reader = std::make_unique<od::CvVideoFrameReader>();
+ reader->Init(GetSpecifiedOption(options, VIDEO_FILE_PATH));
+
+ auto enc = reader->GetSourceEncodingInt();
+ auto fps = reader->GetSourceFps();
+ auto w = reader->GetSourceWidth();
+ auto h = reader->GetSourceHeight();
+ if (!reader->ConvertToRGB())
+ {
+ readerPtr = std::move(std::make_unique<od::CvVideoFrameReaderRgbWrapper>(std::move(reader)));
+ }
+ else
+ {
+ readerPtr = std::move(reader);
+ }
+
+ if(CheckOptionSpecified(options, OUTPUT_VIDEO_FILE_PATH))
+ {
+ std::string outputVideo = GetSpecifiedOption(options, OUTPUT_VIDEO_FILE_PATH);
+ auto writer = std::make_unique<od::CvVideoFileWriter>();
+ writer->Init(outputVideo, enc, fps, w, h);
+
+ return std::make_tuple<>(std::move(readerPtr), std::move(writer));
+ }
+ else
+ {
+ auto writer = std::make_unique<od::CvWindowOutput>();
+ writer->Init("Processed Video");
+ return std::make_tuple<>(std::move(readerPtr), std::move(writer));
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ std::map<std::string, std::string> options;
+
+ int result = ParseOptions(options, CMD_OPTIONS, argv, argc);
+ if (result != 0)
+ {
+ return result;
+ }
+
+ // Create the network options
+ od::ODPipelineOptions pipelineOptions;
+ pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH);
+ pipelineOptions.m_ModelName = GetSpecifiedOption(options, MODEL_NAME);
+
+ if(CheckOptionSpecified(options, PREFERRED_BACKENDS))
+ {
+ pipelineOptions.m_backends = GetPreferredBackendList((GetSpecifiedOption(options, PREFERRED_BACKENDS)));
+ }
+ else
+ {
+ pipelineOptions.m_backends = {"CpuAcc", "CpuRef"};
+ }
+
+ auto labels = AssignColourToLabel(GetSpecifiedOption(options, LABEL_PATH));
+
+ od::IPipelinePtr objectDetectionPipeline = od::CreatePipeline(pipelineOptions);
+
+ auto inputAndOutput = GetFrameSourceAndSink(options);
+ std::unique_ptr<od::IFrameReader<cv::Mat>> reader = std::move(std::get<0>(inputAndOutput));
+ std::unique_ptr<od::IFrameOutput<cv::Mat>> sink = std::move(std::get<1>(inputAndOutput));
+
+ if (!sink->IsReady())
+ {
+ std::cerr << "Failed to open video writer.";
+ return 1;
+ }
+
+ od::InferenceResults results;
+
+ std::shared_ptr<cv::Mat> frame = reader->ReadFrame();
+
+ //pre-allocate frames
+ cv::Mat processed;
+
+ while(!reader->IsExhausted(frame))
+ {
+ objectDetectionPipeline->PreProcessing(*frame, processed);
+ objectDetectionPipeline->Inference(processed, results);
+ objectDetectionPipeline->PostProcessing(results,
+ [&frame, &labels](od::DetectedObjects detects) -> void {
+ AddInferenceOutputToFrame(detects, *frame, labels);
+ });
+
+ sink->WriteFrame(frame);
+ frame = reader->ReadFrame();
+ }
+ sink->Close();
+ return 0;
+}
diff --git a/samples/ObjectDetection/src/NetworkPipeline.cpp b/samples/ObjectDetection/src/NetworkPipeline.cpp
new file mode 100644
index 0000000000..7f05882fc4
--- /dev/null
+++ b/samples/ObjectDetection/src/NetworkPipeline.cpp
@@ -0,0 +1,102 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NetworkPipeline.hpp"
+#include "ImageUtils.hpp"
+
+namespace od
+{
+
+ObjDetectionPipeline::ObjDetectionPipeline(std::unique_ptr<ArmnnNetworkExecutor> executor,
+ std::unique_ptr<IDetectionResultDecoder> decoder) :
+ m_executor(std::move(executor)),
+ m_decoder(std::move(decoder)){}
+
+void od::ObjDetectionPipeline::Inference(const cv::Mat& processed, InferenceResults& result)
+{
+ m_executor->Run(processed.data, processed.total() * processed.elemSize(), result);
+}
+
+void ObjDetectionPipeline::PostProcessing(InferenceResults& inferenceResult,
+ const std::function<void (DetectedObjects)>& callback)
+{
+ DetectedObjects detections = m_decoder->Decode(inferenceResult, m_inputImageSize,
+ m_executor->GetImageAspectRatio(), {});
+ if (callback)
+ {
+ callback(detections);
+ }
+}
+
+void ObjDetectionPipeline::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
+{
+ m_inputImageSize.m_Height = frame.rows;
+ m_inputImageSize.m_Width = frame.cols;
+ ResizeWithPad(frame, processed, m_processedFrame, m_executor->GetImageAspectRatio());
+}
+
+MobileNetSSDv1::MobileNetSSDv1(std::unique_ptr<ArmnnNetworkExecutor> executor,
+ float objectThreshold) :
+ ObjDetectionPipeline(std::move(executor),
+ std::make_unique<SSDResultDecoder>(objectThreshold))
+{}
+
+void MobileNetSSDv1::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
+{
+ ObjDetectionPipeline::PreProcessing(frame, processed);
+ if (m_executor->GetInputDataType() == armnn::DataType::Float32)
+ {
+ // [0, 255] => [-1.0, 1.0]
+ processed.convertTo(processed, CV_32FC3, 1 / 127.5, -1);
+ }
+}
+
+YoloV3Tiny::YoloV3Tiny(std::unique_ptr<ArmnnNetworkExecutor> executor,
+ float NMSThreshold, float ClsThreshold, float ObjectThreshold) :
+ ObjDetectionPipeline(std::move(executor),
+ std::move(std::make_unique<YoloResultDecoder>(NMSThreshold,
+ ClsThreshold,
+ ObjectThreshold)))
+{}
+
+void YoloV3Tiny::PreProcessing(const cv::Mat& frame, cv::Mat& processed)
+{
+ ObjDetectionPipeline::PreProcessing(frame, processed);
+ if (m_executor->GetInputDataType() == armnn::DataType::Float32)
+ {
+ processed.convertTo(processed, CV_32FC3);
+ }
+}
+
+IPipelinePtr CreatePipeline(od::ODPipelineOptions& config)
+{
+ auto executor = std::make_unique<od::ArmnnNetworkExecutor>(config.m_ModelFilePath, config.m_backends);
+
+ if (config.m_ModelName == "SSD_MOBILE")
+ {
+ float detectionThreshold = 0.6;
+
+ return std::make_unique<od::MobileNetSSDv1>(std::move(executor),
+ detectionThreshold
+ );
+ }
+ else if (config.m_ModelName == "YOLO_V3_TINY")
+ {
+ float NMSThreshold = 0.6f;
+ float ClsThreshold = 0.6f;
+ float ObjectThreshold = 0.6f;
+ return std::make_unique<od::YoloV3Tiny>(std::move(executor),
+ NMSThreshold,
+ ClsThreshold,
+ ObjectThreshold
+ );
+ }
+ else
+ {
+ throw std::invalid_argument("Unknown Model name: " + config.m_ModelName + " supplied by user.");
+ }
+
+}
+}// namespace od \ No newline at end of file
diff --git a/samples/ObjectDetection/src/NonMaxSuppression.cpp b/samples/ObjectDetection/src/NonMaxSuppression.cpp
new file mode 100644
index 0000000000..7bcd9045a5
--- /dev/null
+++ b/samples/ObjectDetection/src/NonMaxSuppression.cpp
@@ -0,0 +1,92 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#include "NonMaxSuppression.hpp"
+
+#include <algorithm>
+
+namespace od
+{
+
+static std::vector<unsigned int> GenerateRangeK(unsigned int k)
+{
+ std::vector<unsigned int> range(k);
+ std::iota(range.begin(), range.end(), 0);
+ return range;
+}
+
+
+/**
+* @brief Returns the intersection over union for two bounding boxes
+*
+* @param[in] First detect containing bounding box.
+* @param[in] Second detect containing bounding box.
+* @return Calculated intersection over union.
+*
+*/
+static double IntersectionOverUnion(DetectedObject& detect1, DetectedObject& detect2)
+{
+ uint32_t area1 = (detect1.GetBoundingBox().GetHeight() * detect1.GetBoundingBox().GetWidth());
+ uint32_t area2 = (detect2.GetBoundingBox().GetHeight() * detect2.GetBoundingBox().GetWidth());
+
+ float yMinIntersection = std::max(detect1.GetBoundingBox().GetY(), detect2.GetBoundingBox().GetY());
+ float xMinIntersection = std::max(detect1.GetBoundingBox().GetX(), detect2.GetBoundingBox().GetX());
+
+ float yMaxIntersection = std::min(detect1.GetBoundingBox().GetY() + detect1.GetBoundingBox().GetHeight(),
+ detect2.GetBoundingBox().GetY() + detect2.GetBoundingBox().GetHeight());
+ float xMaxIntersection = std::min(detect1.GetBoundingBox().GetX() + detect1.GetBoundingBox().GetWidth(),
+ detect2.GetBoundingBox().GetX() + detect2.GetBoundingBox().GetWidth());
+
+ double areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) *
+ std::max(xMaxIntersection - xMinIntersection, 0.0f);
+ double areaUnion = area1 + area2 - areaIntersection;
+
+ return areaIntersection / areaUnion;
+}
+
+std::vector<int> NonMaxSuppression(DetectedObjects& inputDetections, float iouThresh)
+{
+ // Sort indicies of detections by highest score to lowest.
+ std::vector<unsigned int> sortedIndicies = GenerateRangeK(inputDetections.size());
+ std::sort(sortedIndicies.begin(), sortedIndicies.end(),
+ [&inputDetections](int idx1, int idx2)
+ {
+ return inputDetections[idx1].GetScore() > inputDetections[idx2].GetScore();
+ });
+
+ std::vector<bool> visited(inputDetections.size(), false);
+ std::vector<int> outputIndiciesAfterNMS;
+
+ for (int i=0; i < inputDetections.size(); ++i)
+ {
+ // Each new unvisited detect should be kept.
+ if (!visited[sortedIndicies[i]])
+ {
+ outputIndiciesAfterNMS.emplace_back(sortedIndicies[i]);
+ visited[sortedIndicies[i]] = true;
+ }
+
+ // Look for detections to suppress.
+ for (int j=i+1; j<inputDetections.size(); ++j)
+ {
+ // Skip if already kept or suppressed.
+ if (!visited[sortedIndicies[j]])
+ {
+ // Detects must have the same label to be suppressed.
+ if (inputDetections[sortedIndicies[j]].GetLabel() == inputDetections[sortedIndicies[i]].GetLabel())
+ {
+ auto iou = IntersectionOverUnion(inputDetections[sortedIndicies[i]],
+ inputDetections[sortedIndicies[j]]);
+ if (iou > iouThresh)
+ {
+ visited[sortedIndicies[j]] = true;
+ }
+ }
+ }
+ }
+ }
+ return outputIndiciesAfterNMS;
+}
+
+} // namespace od
diff --git a/samples/ObjectDetection/src/SSDResultDecoder.cpp b/samples/ObjectDetection/src/SSDResultDecoder.cpp
new file mode 100644
index 0000000000..a3319212e5
--- /dev/null
+++ b/samples/ObjectDetection/src/SSDResultDecoder.cpp
@@ -0,0 +1,80 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "SSDResultDecoder.hpp"
+
+#include <cassert>
+#include <algorithm>
+#include <cmath>
+#include <stdexcept>
+namespace od
+{
+
+DetectedObjects SSDResultDecoder::Decode(const InferenceResults& networkResults,
+ const Size& outputFrameSize,
+ const Size& resizedFrameSize,
+ const std::vector<std::string>& labels)
+{
+ // SSD network outputs 4 tensors: bounding boxes, labels, probabilities, number of detections.
+ if (networkResults.size() != 4)
+ {
+ throw std::runtime_error("Number of outputs from SSD model doesn't equal 4");
+ }
+
+ DetectedObjects detectedObjects;
+ const int numDetections = static_cast<int>(std::lround(networkResults[3][0]));
+
+ double longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height);
+ double longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height);
+ const double resizeFactor = longEdgeOutput/longEdgeInput;
+
+ for (int i=0; i<numDetections; ++i)
+ {
+ if (networkResults[2][i] > m_objectThreshold)
+ {
+ DetectedObject detectedObject;
+ detectedObject.SetScore(networkResults[2][i]);
+ auto classId = std::lround(networkResults[1][i]);
+
+ if (classId < labels.size())
+ {
+ detectedObject.SetLabel(labels[classId]);
+ }
+ else
+ {
+ detectedObject.SetLabel(std::to_string(classId));
+ }
+ detectedObject.SetId(classId);
+
+ // Convert SSD bbox outputs (ratios of image size) to pixel values.
+ double topLeftY = networkResults[0][i*4 + 0] * resizedFrameSize.m_Height;
+ double topLeftX = networkResults[0][i*4 + 1] * resizedFrameSize.m_Width;
+ double botRightY = networkResults[0][i*4 + 2] * resizedFrameSize.m_Height;
+ double botRightX = networkResults[0][i*4 + 3] * resizedFrameSize.m_Width;
+
+ // Scale the coordinates to output frame size.
+ topLeftY *= resizeFactor;
+ topLeftX *= resizeFactor;
+ botRightY *= resizeFactor;
+ botRightX *= resizeFactor;
+
+ assert(botRightX > topLeftX);
+ assert(botRightY > topLeftY);
+
+ // Internal BoundingBox stores box top left x,y and width, height.
+ detectedObject.SetBoundingBox({static_cast<int>(std::round(topLeftX)),
+ static_cast<int>(std::round(topLeftY)),
+ static_cast<unsigned int>(botRightX - topLeftX),
+ static_cast<unsigned int>(botRightY - topLeftY)});
+
+ detectedObjects.emplace_back(detectedObject);
+ }
+ }
+ return detectedObjects;
+}
+
+SSDResultDecoder::SSDResultDecoder(float ObjectThreshold) : m_objectThreshold(ObjectThreshold) {}
+
+}// namespace od \ No newline at end of file
diff --git a/samples/ObjectDetection/src/YoloResultDecoder.cpp b/samples/ObjectDetection/src/YoloResultDecoder.cpp
new file mode 100644
index 0000000000..ffbf7cb68d
--- /dev/null
+++ b/samples/ObjectDetection/src/YoloResultDecoder.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "YoloResultDecoder.hpp"
+
+#include "NonMaxSuppression.hpp"
+
+#include <cassert>
+#include <stdexcept>
+
+namespace od
+{
+
+DetectedObjects YoloResultDecoder::Decode(const InferenceResults& networkResults,
+ const Size& outputFrameSize,
+ const Size& resizedFrameSize,
+ const std::vector<std::string>& labels)
+{
+
+ // Yolo v3 network outputs 1 tensor
+ if (networkResults.size() != 1)
+ {
+ throw std::runtime_error("Number of outputs from Yolo model doesn't equal 1");
+ }
+ auto element_step = m_boxElements + m_confidenceElements + m_numClasses;
+
+ float longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height);
+ float longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height);
+ const float resizeFactor = longEdgeOutput/longEdgeInput;
+
+ DetectedObjects detectedObjects;
+ DetectedObjects resultsAfterNMS;
+
+ for (const InferenceResult& result : networkResults)
+ {
+ for (unsigned int i = 0; i < m_numBoxes; ++i)
+ {
+ const float* cur_box = &result[i * element_step];
+ // Objectness score
+ if (cur_box[4] > m_objectThreshold)
+ {
+ for (unsigned int classIndex = 0; classIndex < m_numClasses; ++classIndex)
+ {
+ const float class_prob = cur_box[4] * cur_box[5 + classIndex];
+
+ // class confidence
+
+ if (class_prob > m_ClsThreshold)
+ {
+ DetectedObject detectedObject;
+
+ detectedObject.SetScore(class_prob);
+
+ float topLeftX = cur_box[0] * resizeFactor;
+ float topLeftY = cur_box[1] * resizeFactor;
+ float botRightX = cur_box[2] * resizeFactor;
+ float botRightY = cur_box[3] * resizeFactor;
+
+ assert(botRightX > topLeftX);
+ assert(botRightY > topLeftY);
+
+ detectedObject.SetBoundingBox({static_cast<int>(topLeftX),
+ static_cast<int>(topLeftY),
+ static_cast<unsigned int>(botRightX-topLeftX),
+ static_cast<unsigned int>(botRightY-topLeftY)});
+ if(labels.size() > classIndex)
+ {
+ detectedObject.SetLabel(labels.at(classIndex));
+ }
+ else
+ {
+ detectedObject.SetLabel(std::to_string(classIndex));
+ }
+ detectedObject.SetId(classIndex);
+ detectedObjects.emplace_back(detectedObject);
+ }
+ }
+ }
+ }
+
+ std::vector<int> keepIndiciesAfterNMS = od::NonMaxSuppression(detectedObjects, m_NmsThreshold);
+
+ for (const int ind: keepIndiciesAfterNMS)
+ {
+ resultsAfterNMS.emplace_back(detectedObjects[ind]);
+ }
+ }
+
+ return resultsAfterNMS;
+}
+
+YoloResultDecoder::YoloResultDecoder(float NMSThreshold, float ClsThreshold, float ObjectThreshold)
+ : m_NmsThreshold(NMSThreshold), m_ClsThreshold(ClsThreshold), m_objectThreshold(ObjectThreshold) {}
+
+}// namespace od
+
+
+