diff options
author | Éanna Ó Catháin <eanna.ocathain@arm.com> | 2020-09-14 17:36:49 +0100 |
---|---|---|
committer | Jim Flynn <jim.flynn@arm.com> | 2020-09-14 18:40:01 +0000 |
commit | 919c14ef132986aa1514b2070ce6d19b5579a6ab (patch) | |
tree | 5c281e02a083768f65871cb861ab9b32ac7d8767 /samples/ObjectDetection/src | |
parent | 589e3e81a86c83456580e112978bf7a0ed5f43ac (diff) | |
download | armnn-919c14ef132986aa1514b2070ce6d19b5579a6ab.tar.gz |
MLECO-929 Add Object Detection sample application using the public ArmNN C++ API
Change-Id: I14aa1b4b726212cffbefd6687203f93f936fa872
Signed-off-by: Éanna Ó Catháin <eanna.ocathain@arm.com>
Diffstat (limited to 'samples/ObjectDetection/src')
-rw-r--r-- | samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp | 140 | ||||
-rw-r--r-- | samples/ObjectDetection/src/BoundingBox.cpp | 116 | ||||
-rw-r--r-- | samples/ObjectDetection/src/CmdArgsParser.cpp | 70 | ||||
-rw-r--r-- | samples/ObjectDetection/src/CvVideoFileWriter.cpp | 38 | ||||
-rw-r--r-- | samples/ObjectDetection/src/CvVideoFrameReader.cpp | 98 | ||||
-rw-r--r-- | samples/ObjectDetection/src/CvWindowOutput.cpp | 33 | ||||
-rw-r--r-- | samples/ObjectDetection/src/DetectedObject.cpp | 65 | ||||
-rw-r--r-- | samples/ObjectDetection/src/ImageUtils.cpp | 126 | ||||
-rw-r--r-- | samples/ObjectDetection/src/Main.cpp | 160 | ||||
-rw-r--r-- | samples/ObjectDetection/src/NetworkPipeline.cpp | 102 | ||||
-rw-r--r-- | samples/ObjectDetection/src/NonMaxSuppression.cpp | 92 | ||||
-rw-r--r-- | samples/ObjectDetection/src/SSDResultDecoder.cpp | 80 | ||||
-rw-r--r-- | samples/ObjectDetection/src/YoloResultDecoder.cpp | 100 |
13 files changed, 1220 insertions, 0 deletions
diff --git a/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp new file mode 100644 index 0000000000..cb4c0c9f84 --- /dev/null +++ b/samples/ObjectDetection/src/ArmnnNetworkExecutor.cpp @@ -0,0 +1,140 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArmnnNetworkExecutor.hpp" +#include "Types.hpp" + +#include <random> +#include <string> + +namespace od +{ + +armnn::DataType ArmnnNetworkExecutor::GetInputDataType() const +{ + return m_inputBindingInfo.second.GetDataType(); +} + +ArmnnNetworkExecutor::ArmnnNetworkExecutor(std::string& modelPath, + std::vector<armnn::BackendId>& preferredBackends) +: m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions())) +{ + // Import the TensorFlow lite model. + armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create(); + armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); + + std::vector<std::string> inputNames = parser->GetSubgraphInputTensorNames(0); + + m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]); + + m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0); + + std::vector<armnn::BindingPointInfo> outputBindings; + for(const std::string& name : m_outputLayerNamesList) + { + m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name))); + } + + std::vector<std::string> errorMessages; + // optimize the network. + armnn::IOptimizedNetworkPtr optNet = Optimize(*network, + preferredBackends, + m_Runtime->GetDeviceSpec(), + armnn::OptimizerOptions(), + armnn::Optional<std::vector<std::string>&>(errorMessages)); + + if (!optNet) + { + const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"}; + ARMNN_LOG(error) << errorMessage; + throw armnn::Exception(errorMessage); + } + + // Load the optimized network onto the m_Runtime device + std::string errorMessage; + if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage)) + { + ARMNN_LOG(error) << errorMessage; + } + + //pre-allocate memory for output (the size of it never changes) + for (int it = 0; it < m_outputLayerNamesList.size(); ++it) + { + const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType(); + const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape(); + + InferenceResult oneLayerOutResult; + switch (dataType) + { + case armnn::DataType::Float32: + { + oneLayerOutResult.resize(tensorShape.GetNumElements(), 0); + break; + } + default: + { + errorMessage = "ArmnnNetworkExecutor: unsupported output tensor data type"; + ARMNN_LOG(error) << errorMessage << " " << log_as_int(dataType); + throw armnn::Exception(errorMessage); + } + } + + m_OutputBuffer.emplace_back(oneLayerOutResult); + + // Make ArmNN output tensors + m_OutputTensors.reserve(m_OutputBuffer.size()); + for (size_t it = 0; it < m_OutputBuffer.size(); ++it) + { + m_OutputTensors.emplace_back(std::make_pair( + m_outputBindingInfo[it].first, + armnn::Tensor(m_outputBindingInfo[it].second, + m_OutputBuffer.at(it).data()) + )); + } + } + +} + +void ArmnnNetworkExecutor::PrepareTensors(const void* inputData, const size_t dataBytes) +{ + assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes); + m_InputTensors.clear(); + m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}}; +} + +bool ArmnnNetworkExecutor::Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults) +{ + /* Prepare tensors if they are not ready */ + ARMNN_LOG(debug) << "Preparing tensors..."; + this->PrepareTensors(inputData, dataBytes); + ARMNN_LOG(trace) << "Running inference..."; + + armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors); + + std::stringstream inferenceFinished; + inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n"; + + ARMNN_LOG(trace) << inferenceFinished.str(); + + if (ret == armnn::Status::Failure) + { + ARMNN_LOG(error) << "Failed to perform inference."; + } + + outResults.reserve(m_outputLayerNamesList.size()); + outResults = m_OutputBuffer; + + return (armnn::Status::Success == ret); +} + +Size ArmnnNetworkExecutor::GetImageAspectRatio() +{ + const auto shape = m_inputBindingInfo.second.GetShape(); + assert(shape.GetNumDimensions() == 4); + armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC); + return Size(shape[nhwc.GetWidthIndex()], + shape[nhwc.GetHeightIndex()]); +} +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/src/BoundingBox.cpp b/samples/ObjectDetection/src/BoundingBox.cpp new file mode 100644 index 0000000000..c52b0fe58a --- /dev/null +++ b/samples/ObjectDetection/src/BoundingBox.cpp @@ -0,0 +1,116 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "BoundingBox.hpp" +#include <algorithm> +namespace od +{ + +BoundingBox::BoundingBox() : + BoundingBox(0, 0, 0u, 0u) {} + +BoundingBox::BoundingBox( + int x, + int y, + unsigned int width, + unsigned int height) : + m_X(x), + m_Y(y), + m_Width(width), + m_Height(height) {} + +BoundingBox::BoundingBox(const BoundingBox& other) : + m_X(other.m_X), + m_Y(other.m_Y), + m_Width(other.m_Width), + m_Height(other.m_Height) {} + +int BoundingBox::GetX() const { + return m_X; +} + +int BoundingBox::GetY() const { + return m_Y; +} + +unsigned int BoundingBox::GetWidth() const { + return m_Width; +} + +unsigned int BoundingBox::GetHeight() const { + return m_Height; +} + +void BoundingBox::SetX(int x) { + m_X = x; +} + +void BoundingBox::SetY(int y) { + m_Y = y; +} + +void BoundingBox::SetWidth(unsigned int width) { + m_Width = width; +} + +void BoundingBox::SetHeight(unsigned int height) { + m_Height = height; +} + +BoundingBox& BoundingBox::operator=(const BoundingBox& other) { + m_X = other.m_X; + m_Y = other.m_Y; + + m_Width = other.m_Width; + m_Height = other.m_Height; + + return *this; +} + +/* Helper function to get a "valid" bounding box */ +void GetValidBoundingBox(const BoundingBox& boxIn, BoundingBox& boxOut, + const BoundingBox& boxLimits) { + boxOut.SetX(std::max(boxIn.GetX(), boxLimits.GetX())); + boxOut.SetY(std::max(boxIn.GetY(), boxLimits.GetY())); + + /* If we have changed x and/or y, we compensate by reducing the height and/or width */ + int boxOutWidth = static_cast<int>(boxIn.GetWidth()) - + std::max(0, (boxOut.GetX() - boxIn.GetX())); + int boxOutHeight = static_cast<int>(boxIn.GetHeight()) - + std::max(0, (boxOut.GetY() - boxIn.GetY())); + + /* This suggests that there was no overlap on x or/and y axis */ + if (boxOutHeight <= 0 || boxOutWidth <= 0) + { + boxOut = BoundingBox{0, 0, 0, 0}; + return; + } + + const int limitBoxRightX = boxLimits.GetX() + static_cast<int>(boxLimits.GetWidth()); + const int limitBoxRightY = boxLimits.GetY() + static_cast<int>(boxLimits.GetHeight()); + const int boxRightX = boxOut.GetX() + boxOutWidth; + const int boxRightY = boxOut.GetY() + boxOutHeight; + + if (boxRightX > limitBoxRightX) + { + boxOutWidth -= (boxRightX - limitBoxRightX); + } + + if (boxRightY > limitBoxRightY) + { + boxOutHeight -= (boxRightY - limitBoxRightY); + } + + /* This suggests value has rolled over because of very high numbers, not handled for now */ + if (boxOutHeight <= 0 || boxOutWidth <= 0) + { + boxOut = BoundingBox{0, 0, 0, 0}; + return; + } + + boxOut.SetHeight(static_cast<unsigned int>(boxOutHeight)); + boxOut.SetWidth(static_cast<unsigned int>(boxOutWidth)); +} +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/src/CmdArgsParser.cpp b/samples/ObjectDetection/src/CmdArgsParser.cpp new file mode 100644 index 0000000000..b8c74bc10f --- /dev/null +++ b/samples/ObjectDetection/src/CmdArgsParser.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CmdArgsParser.hpp" +#include <iostream> +/* + * Checks that a particular option was specified by the user + */ +bool CheckOptionSpecified(const std::map<std::string, std::string>& options, const std::string& option) +{ + auto it = options.find(option); + return it!=options.end(); +} + +/* + * Retrieves the user provided option + */ +std::string GetSpecifiedOption(const std::map<std::string, std::string>& options, const std::string& option) +{ + if (CheckOptionSpecified(options, option)){ + return options.at(option); + } + else + { + throw std::invalid_argument("Required option: " + option + " not defined."); + } +} + +/* + * Parses all the command line options provided by the user and stores in a map. + */ +int ParseOptions(std::map<std::string, std::string>& options, std::map<std::string, std::string>& acceptedOptions, + char *argv[], int argc) +{ + for (int i = 1; i < argc; ++i) + { + std::string currentOption = std::string(argv[i]); + auto it = acceptedOptions.find(currentOption); + if (it != acceptedOptions.end()) + { + if (i + 1 < argc && std::string(argv[i + 1]).rfind("--", 0) != 0) + { + std::string value = argv[++i]; + options.insert({it->first, value}); + } + else if (std::string(argv[i]) == HELP) + { + std::cout << "Available options" << std::endl; + for (auto & acceptedOption : acceptedOptions) + { + std::cout << acceptedOption.first << " : " << acceptedOption.second << std::endl; + } + return 2; + } + else + { + std::cerr << std::string(argv[i]) << " option requires one argument." << std::endl; + return 1; + } + } + else + { + std::cerr << "Unrecognised option: " << std::string(argv[i]) << std::endl; + return 1; + } + } + return 0; +} diff --git a/samples/ObjectDetection/src/CvVideoFileWriter.cpp b/samples/ObjectDetection/src/CvVideoFileWriter.cpp new file mode 100644 index 0000000000..ab80b95d49 --- /dev/null +++ b/samples/ObjectDetection/src/CvVideoFileWriter.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CvVideoFileWriter.hpp" + +namespace od +{ + +void CvVideoFileWriter::Init(const std::string& outputVideo, int encoding, double fps, int width, int height) +{ + m_ready = m_cvWriter.open(outputVideo, cv::CAP_FFMPEG, + encoding, + fps, + cv::Size(width, height), true); +} + + +void CvVideoFileWriter::WriteFrame(std::shared_ptr<cv::Mat>& frame) +{ + if(m_cvWriter.isOpened()) + { + cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR); + m_cvWriter.write(*frame); + } +} + +bool CvVideoFileWriter::IsReady() const +{ + return m_ready; +} + +void CvVideoFileWriter::Close() +{ + m_cvWriter.release(); +} +}// namespace od diff --git a/samples/ObjectDetection/src/CvVideoFrameReader.cpp b/samples/ObjectDetection/src/CvVideoFrameReader.cpp new file mode 100644 index 0000000000..09b5050973 --- /dev/null +++ b/samples/ObjectDetection/src/CvVideoFrameReader.cpp @@ -0,0 +1,98 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + + +#include "CvVideoFrameReader.hpp" + +namespace od +{ + +std::shared_ptr<cv::Mat> CvVideoFrameReader::ReadFrame() +{ + // opencv copies data anyway + cv::Mat captureFrame; + m_capture.read(captureFrame); + return std::make_shared<cv::Mat>(std::move(captureFrame)); +} + +bool CvVideoFrameReader::IsExhausted(const std::shared_ptr<cv::Mat>& frame) const +{ + assert(frame!=nullptr); + return frame->empty(); +} + +void CvVideoFrameReader::CheckIsOpen(const std::string& source) +{ + if (!m_capture.isOpened()) + { + throw std::runtime_error("Failed to open video capture for the source = " + source); + } +} + +void CvVideoFrameReader::Init(const std::string& source) +{ + m_capture.open(source); + CheckIsOpen(source); +} + +int CvVideoFrameReader::GetSourceWidth() const +{ + return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_WIDTH))); +} + +int CvVideoFrameReader::GetSourceHeight() const +{ + return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_HEIGHT))); +} + +double CvVideoFrameReader::GetSourceFps() const +{ + return m_capture.get(cv::CAP_PROP_FPS); +} + +bool CvVideoFrameReader::ConvertToRGB() +{ + m_capture.set(cv::CAP_PROP_CONVERT_RGB, 1.0); + return static_cast<bool>(m_capture.get(cv::CAP_PROP_CONVERT_RGB)); +} + +std::string CvVideoFrameReader::GetSourceEncoding() const +{ + char fourccStr[5]; + auto fourcc = (int)m_capture.get(cv::CAP_PROP_FOURCC); + sprintf(fourccStr,"%c%c%c%c",fourcc & 0xFF, (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, (fourcc >> 24) & 0xFF); + return fourccStr; +} + +int CvVideoFrameReader::GetSourceEncodingInt() const +{ + return (int)m_capture.get(cv::CAP_PROP_FOURCC); +} + +int CvVideoFrameReader::GetFrameCount() const +{ + return static_cast<int>(lround(m_capture.get(cv::CAP_PROP_FRAME_COUNT))); +}; + +std::shared_ptr<cv::Mat> CvVideoFrameReaderRgbWrapper::ReadFrame() +{ + auto framePtr = m_reader->ReadFrame(); + if (!IsExhausted(framePtr)) + { + cv::cvtColor(*framePtr, *framePtr, cv::COLOR_BGR2RGB); + } + return framePtr; +} + +bool CvVideoFrameReaderRgbWrapper::IsExhausted(const std::shared_ptr<cv::Mat>& frame) const +{ + return m_reader->IsExhausted(frame); +} + +CvVideoFrameReaderRgbWrapper::CvVideoFrameReaderRgbWrapper(std::unique_ptr<od::CvVideoFrameReader> reader): + m_reader(std::move(reader)) +{} + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/src/CvWindowOutput.cpp b/samples/ObjectDetection/src/CvWindowOutput.cpp new file mode 100644 index 0000000000..a32147b19a --- /dev/null +++ b/samples/ObjectDetection/src/CvWindowOutput.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CvWindowOutput.hpp" + +namespace od +{ + +void CvWindowOutput::Init(const std::string& windowName) +{ + m_windowName = windowName; + cv::namedWindow(m_windowName, cv::WINDOW_AUTOSIZE); +} + +void CvWindowOutput::WriteFrame(std::shared_ptr<cv::Mat>& frame) +{ + cv::cvtColor(*frame, *frame, cv::COLOR_RGB2BGR); + cv::imshow( m_windowName, *frame); + cv::waitKey(30); +} + +void CvWindowOutput::Close() +{ + cv::destroyWindow(m_windowName); +} + +bool CvWindowOutput::IsReady() const +{ + return true; +} +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/src/DetectedObject.cpp b/samples/ObjectDetection/src/DetectedObject.cpp new file mode 100644 index 0000000000..95f99a07d6 --- /dev/null +++ b/samples/ObjectDetection/src/DetectedObject.cpp @@ -0,0 +1,65 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "DetectedObject.hpp" + +namespace od +{ + +DetectedObject::DetectedObject() : + DetectedObject(0u, "", BoundingBox(), 0u) +{} + +DetectedObject::DetectedObject( + unsigned int id, + std::string label, + const BoundingBox &boundingBox, + float score) : + m_Id(id), + m_Label(std::move(label)), + m_BoundingBox(boundingBox), + m_Score(score) +{} + +unsigned int DetectedObject::GetId() const +{ + return m_Id; +} + +const std::string &DetectedObject::GetLabel() const +{ + return m_Label; +} + +const BoundingBox &DetectedObject::GetBoundingBox() const +{ + return m_BoundingBox; +} + +float DetectedObject::GetScore() const +{ + return m_Score; +} + +void DetectedObject::SetId(unsigned int id) +{ + m_Id = id; +} + +void DetectedObject::SetLabel(const std::string &label) +{ + m_Label = label; +} + +void DetectedObject::SetBoundingBox(const BoundingBox &boundingBox) +{ + m_BoundingBox = boundingBox; +} + +void DetectedObject::SetScore(float score) +{ + m_Score = score; +} +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/src/ImageUtils.cpp b/samples/ObjectDetection/src/ImageUtils.cpp new file mode 100644 index 0000000000..9a3ed17b63 --- /dev/null +++ b/samples/ObjectDetection/src/ImageUtils.cpp @@ -0,0 +1,126 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ImageUtils.hpp" +#include "BoundingBox.hpp" +#include "Types.hpp" + +#include <armnn/Logging.hpp> + +static cv::Scalar GetScalarColorCode(std::tuple<int, int, int> color) +{ + return cv::Scalar(std::get<0>(color), std::get<1>(color), std::get<2>(color)); +} + +void AddInferenceOutputToFrame(od::DetectedObjects& decodedResults, cv::Mat& inputFrame, + std::vector<std::tuple<std::string, od::BBoxColor>>& labels) +{ + for(const od::DetectedObject& object : decodedResults) + { + int confidence = static_cast<int>(object.GetScore() * 100); + int baseline = 0; + std::string textStr; + std::tuple<int, int, int> colorCode(255, 0, 0); //red + + if (labels.size() > object.GetId()) + { + auto label = labels[object.GetId()]; + textStr = std::get<0>(label) + " - " + std::to_string(confidence) + "%"; + colorCode = std::get<1>(label).colorCode; + } + else + { + textStr = std::to_string(object.GetId()) + " - " + std::to_string(confidence) + "%"; + } + + cv::Size textSize = getTextSize(textStr, cv::FONT_HERSHEY_DUPLEX, 1.0, 1, &baseline); + + const od::BoundingBox& bbox = object.GetBoundingBox(); + + if (bbox.GetX() + bbox.GetWidth() > inputFrame.cols) + { + cv::Rect r(bbox.GetX(), bbox.GetY(), inputFrame.cols - bbox.GetX(), bbox.GetHeight()); + + cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0); + } + else if (bbox.GetY() + bbox.GetHeight() > inputFrame.rows) + { + cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), inputFrame.rows - bbox.GetY()); + + cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0); + } + else + { + cv::Rect r(bbox.GetX(), bbox.GetY(), bbox.GetWidth(), bbox.GetHeight()); + + cv::rectangle(inputFrame, r, GetScalarColorCode(colorCode), 2, 8, 0); + } + + int textBoxY = std::max(0 ,bbox.GetY() - textSize.height); + + cv::Rect text(bbox.GetX(), textBoxY, textSize.width, textSize.height); + + cv::rectangle(inputFrame, text, GetScalarColorCode(colorCode), -1); + + cv::Scalar color; + + if(std::get<0>(colorCode) + std::get<1>(colorCode) + std::get<2>(colorCode) > 127) + { + color = cv::Scalar::all(0); + } + else + { + color = cv::Scalar::all(255); + } + + cv::putText(inputFrame, + textStr , + cv::Point(bbox.GetX(), textBoxY + textSize.height -(textSize.height)/3), + cv::FONT_HERSHEY_DUPLEX, + 0.5, + color, + 1); + } +} + + +void ResizeFrame(const cv::Mat& frame, cv::Mat& dest, const od::Size& aspectRatio) +{ + if(&dest != &frame) + { + double longEdgeInput = std::max(frame.rows, frame.cols); + double longEdgeOutput = std::max(aspectRatio.m_Width, aspectRatio.m_Height); + const double resizeFactor = longEdgeOutput/longEdgeInput; + cv::resize(frame, dest, cv::Size(0, 0), resizeFactor, resizeFactor, DefaultResizeFlag); + } + else + { + const std::string warningMessage{"Resize was not performed because resized frame references the source frame."}; + ARMNN_LOG(warning) << warningMessage; + } +} + +/** Pad a frame with zeros (add rows and columns to the end) */ +void PadFrame(const cv::Mat& src, cv::Mat& dest, const int bottom, const int right) +{ + if(&dest != &src) + { + cv::copyMakeBorder(src, dest, 0, bottom, 0, right, cv::BORDER_CONSTANT); + } + else + { + const std::string warningMessage + { + "Pad was not performed because destination frame references the source frame." + }; + ARMNN_LOG(warning) << warningMessage; + } +} + +void ResizeWithPad(const cv::Mat& frame, cv::Mat& dest, cv::Mat& cache, const od::Size& destSize) +{ + ResizeFrame(frame, cache, destSize); + PadFrame(cache, dest,destSize.m_Height - cache.rows,destSize.m_Width - cache.cols); +} diff --git a/samples/ObjectDetection/src/Main.cpp b/samples/ObjectDetection/src/Main.cpp new file mode 100644 index 0000000000..10abb65cce --- /dev/null +++ b/samples/ObjectDetection/src/Main.cpp @@ -0,0 +1,160 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "CvVideoFrameReader.hpp" +#include "CvWindowOutput.hpp" +#include "CvVideoFileWriter.hpp" +#include "NetworkPipeline.hpp" +#include "CmdArgsParser.hpp" + +#include <fstream> +#include <iostream> +#include <map> +#include <random> + +/* + * Reads the user supplied backend preference, splits it by comma, and returns an ordered vector + */ +std::vector<armnn::BackendId> GetPreferredBackendList(const std::string& preferredBackends) +{ + std::vector<armnn::BackendId> backends; + std::stringstream ss(preferredBackends); + + while(ss.good()) + { + std::string backend; + std::getline( ss, backend, ',' ); + backends.emplace_back(backend); + } + return backends; +} + +/* + * Assigns a color to each label in the label set + */ +std::vector<std::tuple<std::string, od::BBoxColor>> AssignColourToLabel(const std::string& pathToLabelFile) +{ + std::ifstream in(pathToLabelFile); + std::vector<std::tuple<std::string, od::BBoxColor>> labels; + + std::string str; + std::default_random_engine generator; + std::uniform_int_distribution<int> distribution(0,255); + + while (std::getline(in, str)) + { + if(!str.empty()) + { + od::BBoxColor c{ + .colorCode = std::make_tuple(distribution(generator), + distribution(generator), + distribution(generator)) + }; + auto bboxInfo = std::make_tuple (str, c); + + labels.emplace_back(bboxInfo); + } + } + return labels; +} + +std::tuple<std::unique_ptr<od::IFrameReader<cv::Mat>>, + std::unique_ptr<od::IFrameOutput<cv::Mat>>> + GetFrameSourceAndSink(const std::map<std::string, std::string>& options) { + + std::unique_ptr<od::IFrameReader<cv::Mat>> readerPtr; + + std::unique_ptr<od::CvVideoFrameReader> reader = std::make_unique<od::CvVideoFrameReader>(); + reader->Init(GetSpecifiedOption(options, VIDEO_FILE_PATH)); + + auto enc = reader->GetSourceEncodingInt(); + auto fps = reader->GetSourceFps(); + auto w = reader->GetSourceWidth(); + auto h = reader->GetSourceHeight(); + if (!reader->ConvertToRGB()) + { + readerPtr = std::move(std::make_unique<od::CvVideoFrameReaderRgbWrapper>(std::move(reader))); + } + else + { + readerPtr = std::move(reader); + } + + if(CheckOptionSpecified(options, OUTPUT_VIDEO_FILE_PATH)) + { + std::string outputVideo = GetSpecifiedOption(options, OUTPUT_VIDEO_FILE_PATH); + auto writer = std::make_unique<od::CvVideoFileWriter>(); + writer->Init(outputVideo, enc, fps, w, h); + + return std::make_tuple<>(std::move(readerPtr), std::move(writer)); + } + else + { + auto writer = std::make_unique<od::CvWindowOutput>(); + writer->Init("Processed Video"); + return std::make_tuple<>(std::move(readerPtr), std::move(writer)); + } +} + +int main(int argc, char *argv[]) +{ + std::map<std::string, std::string> options; + + int result = ParseOptions(options, CMD_OPTIONS, argv, argc); + if (result != 0) + { + return result; + } + + // Create the network options + od::ODPipelineOptions pipelineOptions; + pipelineOptions.m_ModelFilePath = GetSpecifiedOption(options, MODEL_FILE_PATH); + pipelineOptions.m_ModelName = GetSpecifiedOption(options, MODEL_NAME); + + if(CheckOptionSpecified(options, PREFERRED_BACKENDS)) + { + pipelineOptions.m_backends = GetPreferredBackendList((GetSpecifiedOption(options, PREFERRED_BACKENDS))); + } + else + { + pipelineOptions.m_backends = {"CpuAcc", "CpuRef"}; + } + + auto labels = AssignColourToLabel(GetSpecifiedOption(options, LABEL_PATH)); + + od::IPipelinePtr objectDetectionPipeline = od::CreatePipeline(pipelineOptions); + + auto inputAndOutput = GetFrameSourceAndSink(options); + std::unique_ptr<od::IFrameReader<cv::Mat>> reader = std::move(std::get<0>(inputAndOutput)); + std::unique_ptr<od::IFrameOutput<cv::Mat>> sink = std::move(std::get<1>(inputAndOutput)); + + if (!sink->IsReady()) + { + std::cerr << "Failed to open video writer."; + return 1; + } + + od::InferenceResults results; + + std::shared_ptr<cv::Mat> frame = reader->ReadFrame(); + + //pre-allocate frames + cv::Mat processed; + + while(!reader->IsExhausted(frame)) + { + objectDetectionPipeline->PreProcessing(*frame, processed); + objectDetectionPipeline->Inference(processed, results); + objectDetectionPipeline->PostProcessing(results, + [&frame, &labels](od::DetectedObjects detects) -> void { + AddInferenceOutputToFrame(detects, *frame, labels); + }); + + sink->WriteFrame(frame); + frame = reader->ReadFrame(); + } + sink->Close(); + return 0; +} diff --git a/samples/ObjectDetection/src/NetworkPipeline.cpp b/samples/ObjectDetection/src/NetworkPipeline.cpp new file mode 100644 index 0000000000..7f05882fc4 --- /dev/null +++ b/samples/ObjectDetection/src/NetworkPipeline.cpp @@ -0,0 +1,102 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NetworkPipeline.hpp" +#include "ImageUtils.hpp" + +namespace od +{ + +ObjDetectionPipeline::ObjDetectionPipeline(std::unique_ptr<ArmnnNetworkExecutor> executor, + std::unique_ptr<IDetectionResultDecoder> decoder) : + m_executor(std::move(executor)), + m_decoder(std::move(decoder)){} + +void od::ObjDetectionPipeline::Inference(const cv::Mat& processed, InferenceResults& result) +{ + m_executor->Run(processed.data, processed.total() * processed.elemSize(), result); +} + +void ObjDetectionPipeline::PostProcessing(InferenceResults& inferenceResult, + const std::function<void (DetectedObjects)>& callback) +{ + DetectedObjects detections = m_decoder->Decode(inferenceResult, m_inputImageSize, + m_executor->GetImageAspectRatio(), {}); + if (callback) + { + callback(detections); + } +} + +void ObjDetectionPipeline::PreProcessing(const cv::Mat& frame, cv::Mat& processed) +{ + m_inputImageSize.m_Height = frame.rows; + m_inputImageSize.m_Width = frame.cols; + ResizeWithPad(frame, processed, m_processedFrame, m_executor->GetImageAspectRatio()); +} + +MobileNetSSDv1::MobileNetSSDv1(std::unique_ptr<ArmnnNetworkExecutor> executor, + float objectThreshold) : + ObjDetectionPipeline(std::move(executor), + std::make_unique<SSDResultDecoder>(objectThreshold)) +{} + +void MobileNetSSDv1::PreProcessing(const cv::Mat& frame, cv::Mat& processed) +{ + ObjDetectionPipeline::PreProcessing(frame, processed); + if (m_executor->GetInputDataType() == armnn::DataType::Float32) + { + // [0, 255] => [-1.0, 1.0] + processed.convertTo(processed, CV_32FC3, 1 / 127.5, -1); + } +} + +YoloV3Tiny::YoloV3Tiny(std::unique_ptr<ArmnnNetworkExecutor> executor, + float NMSThreshold, float ClsThreshold, float ObjectThreshold) : + ObjDetectionPipeline(std::move(executor), + std::move(std::make_unique<YoloResultDecoder>(NMSThreshold, + ClsThreshold, + ObjectThreshold))) +{} + +void YoloV3Tiny::PreProcessing(const cv::Mat& frame, cv::Mat& processed) +{ + ObjDetectionPipeline::PreProcessing(frame, processed); + if (m_executor->GetInputDataType() == armnn::DataType::Float32) + { + processed.convertTo(processed, CV_32FC3); + } +} + +IPipelinePtr CreatePipeline(od::ODPipelineOptions& config) +{ + auto executor = std::make_unique<od::ArmnnNetworkExecutor>(config.m_ModelFilePath, config.m_backends); + + if (config.m_ModelName == "SSD_MOBILE") + { + float detectionThreshold = 0.6; + + return std::make_unique<od::MobileNetSSDv1>(std::move(executor), + detectionThreshold + ); + } + else if (config.m_ModelName == "YOLO_V3_TINY") + { + float NMSThreshold = 0.6f; + float ClsThreshold = 0.6f; + float ObjectThreshold = 0.6f; + return std::make_unique<od::YoloV3Tiny>(std::move(executor), + NMSThreshold, + ClsThreshold, + ObjectThreshold + ); + } + else + { + throw std::invalid_argument("Unknown Model name: " + config.m_ModelName + " supplied by user."); + } + +} +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/src/NonMaxSuppression.cpp b/samples/ObjectDetection/src/NonMaxSuppression.cpp new file mode 100644 index 0000000000..7bcd9045a5 --- /dev/null +++ b/samples/ObjectDetection/src/NonMaxSuppression.cpp @@ -0,0 +1,92 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "NonMaxSuppression.hpp" + +#include <algorithm> + +namespace od +{ + +static std::vector<unsigned int> GenerateRangeK(unsigned int k) +{ + std::vector<unsigned int> range(k); + std::iota(range.begin(), range.end(), 0); + return range; +} + + +/** +* @brief Returns the intersection over union for two bounding boxes +* +* @param[in] First detect containing bounding box. +* @param[in] Second detect containing bounding box. +* @return Calculated intersection over union. +* +*/ +static double IntersectionOverUnion(DetectedObject& detect1, DetectedObject& detect2) +{ + uint32_t area1 = (detect1.GetBoundingBox().GetHeight() * detect1.GetBoundingBox().GetWidth()); + uint32_t area2 = (detect2.GetBoundingBox().GetHeight() * detect2.GetBoundingBox().GetWidth()); + + float yMinIntersection = std::max(detect1.GetBoundingBox().GetY(), detect2.GetBoundingBox().GetY()); + float xMinIntersection = std::max(detect1.GetBoundingBox().GetX(), detect2.GetBoundingBox().GetX()); + + float yMaxIntersection = std::min(detect1.GetBoundingBox().GetY() + detect1.GetBoundingBox().GetHeight(), + detect2.GetBoundingBox().GetY() + detect2.GetBoundingBox().GetHeight()); + float xMaxIntersection = std::min(detect1.GetBoundingBox().GetX() + detect1.GetBoundingBox().GetWidth(), + detect2.GetBoundingBox().GetX() + detect2.GetBoundingBox().GetWidth()); + + double areaIntersection = std::max(yMaxIntersection - yMinIntersection, 0.0f) * + std::max(xMaxIntersection - xMinIntersection, 0.0f); + double areaUnion = area1 + area2 - areaIntersection; + + return areaIntersection / areaUnion; +} + +std::vector<int> NonMaxSuppression(DetectedObjects& inputDetections, float iouThresh) +{ + // Sort indicies of detections by highest score to lowest. + std::vector<unsigned int> sortedIndicies = GenerateRangeK(inputDetections.size()); + std::sort(sortedIndicies.begin(), sortedIndicies.end(), + [&inputDetections](int idx1, int idx2) + { + return inputDetections[idx1].GetScore() > inputDetections[idx2].GetScore(); + }); + + std::vector<bool> visited(inputDetections.size(), false); + std::vector<int> outputIndiciesAfterNMS; + + for (int i=0; i < inputDetections.size(); ++i) + { + // Each new unvisited detect should be kept. + if (!visited[sortedIndicies[i]]) + { + outputIndiciesAfterNMS.emplace_back(sortedIndicies[i]); + visited[sortedIndicies[i]] = true; + } + + // Look for detections to suppress. + for (int j=i+1; j<inputDetections.size(); ++j) + { + // Skip if already kept or suppressed. + if (!visited[sortedIndicies[j]]) + { + // Detects must have the same label to be suppressed. + if (inputDetections[sortedIndicies[j]].GetLabel() == inputDetections[sortedIndicies[i]].GetLabel()) + { + auto iou = IntersectionOverUnion(inputDetections[sortedIndicies[i]], + inputDetections[sortedIndicies[j]]); + if (iou > iouThresh) + { + visited[sortedIndicies[j]] = true; + } + } + } + } + } + return outputIndiciesAfterNMS; +} + +} // namespace od diff --git a/samples/ObjectDetection/src/SSDResultDecoder.cpp b/samples/ObjectDetection/src/SSDResultDecoder.cpp new file mode 100644 index 0000000000..a3319212e5 --- /dev/null +++ b/samples/ObjectDetection/src/SSDResultDecoder.cpp @@ -0,0 +1,80 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "SSDResultDecoder.hpp" + +#include <cassert> +#include <algorithm> +#include <cmath> +#include <stdexcept> +namespace od +{ + +DetectedObjects SSDResultDecoder::Decode(const InferenceResults& networkResults, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector<std::string>& labels) +{ + // SSD network outputs 4 tensors: bounding boxes, labels, probabilities, number of detections. + if (networkResults.size() != 4) + { + throw std::runtime_error("Number of outputs from SSD model doesn't equal 4"); + } + + DetectedObjects detectedObjects; + const int numDetections = static_cast<int>(std::lround(networkResults[3][0])); + + double longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height); + double longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height); + const double resizeFactor = longEdgeOutput/longEdgeInput; + + for (int i=0; i<numDetections; ++i) + { + if (networkResults[2][i] > m_objectThreshold) + { + DetectedObject detectedObject; + detectedObject.SetScore(networkResults[2][i]); + auto classId = std::lround(networkResults[1][i]); + + if (classId < labels.size()) + { + detectedObject.SetLabel(labels[classId]); + } + else + { + detectedObject.SetLabel(std::to_string(classId)); + } + detectedObject.SetId(classId); + + // Convert SSD bbox outputs (ratios of image size) to pixel values. + double topLeftY = networkResults[0][i*4 + 0] * resizedFrameSize.m_Height; + double topLeftX = networkResults[0][i*4 + 1] * resizedFrameSize.m_Width; + double botRightY = networkResults[0][i*4 + 2] * resizedFrameSize.m_Height; + double botRightX = networkResults[0][i*4 + 3] * resizedFrameSize.m_Width; + + // Scale the coordinates to output frame size. + topLeftY *= resizeFactor; + topLeftX *= resizeFactor; + botRightY *= resizeFactor; + botRightX *= resizeFactor; + + assert(botRightX > topLeftX); + assert(botRightY > topLeftY); + + // Internal BoundingBox stores box top left x,y and width, height. + detectedObject.SetBoundingBox({static_cast<int>(std::round(topLeftX)), + static_cast<int>(std::round(topLeftY)), + static_cast<unsigned int>(botRightX - topLeftX), + static_cast<unsigned int>(botRightY - topLeftY)}); + + detectedObjects.emplace_back(detectedObject); + } + } + return detectedObjects; +} + +SSDResultDecoder::SSDResultDecoder(float ObjectThreshold) : m_objectThreshold(ObjectThreshold) {} + +}// namespace od
\ No newline at end of file diff --git a/samples/ObjectDetection/src/YoloResultDecoder.cpp b/samples/ObjectDetection/src/YoloResultDecoder.cpp new file mode 100644 index 0000000000..ffbf7cb68d --- /dev/null +++ b/samples/ObjectDetection/src/YoloResultDecoder.cpp @@ -0,0 +1,100 @@ +// +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "YoloResultDecoder.hpp" + +#include "NonMaxSuppression.hpp" + +#include <cassert> +#include <stdexcept> + +namespace od +{ + +DetectedObjects YoloResultDecoder::Decode(const InferenceResults& networkResults, + const Size& outputFrameSize, + const Size& resizedFrameSize, + const std::vector<std::string>& labels) +{ + + // Yolo v3 network outputs 1 tensor + if (networkResults.size() != 1) + { + throw std::runtime_error("Number of outputs from Yolo model doesn't equal 1"); + } + auto element_step = m_boxElements + m_confidenceElements + m_numClasses; + + float longEdgeInput = std::max(resizedFrameSize.m_Width, resizedFrameSize.m_Height); + float longEdgeOutput = std::max(outputFrameSize.m_Width, outputFrameSize.m_Height); + const float resizeFactor = longEdgeOutput/longEdgeInput; + + DetectedObjects detectedObjects; + DetectedObjects resultsAfterNMS; + + for (const InferenceResult& result : networkResults) + { + for (unsigned int i = 0; i < m_numBoxes; ++i) + { + const float* cur_box = &result[i * element_step]; + // Objectness score + if (cur_box[4] > m_objectThreshold) + { + for (unsigned int classIndex = 0; classIndex < m_numClasses; ++classIndex) + { + const float class_prob = cur_box[4] * cur_box[5 + classIndex]; + + // class confidence + + if (class_prob > m_ClsThreshold) + { + DetectedObject detectedObject; + + detectedObject.SetScore(class_prob); + + float topLeftX = cur_box[0] * resizeFactor; + float topLeftY = cur_box[1] * resizeFactor; + float botRightX = cur_box[2] * resizeFactor; + float botRightY = cur_box[3] * resizeFactor; + + assert(botRightX > topLeftX); + assert(botRightY > topLeftY); + + detectedObject.SetBoundingBox({static_cast<int>(topLeftX), + static_cast<int>(topLeftY), + static_cast<unsigned int>(botRightX-topLeftX), + static_cast<unsigned int>(botRightY-topLeftY)}); + if(labels.size() > classIndex) + { + detectedObject.SetLabel(labels.at(classIndex)); + } + else + { + detectedObject.SetLabel(std::to_string(classIndex)); + } + detectedObject.SetId(classIndex); + detectedObjects.emplace_back(detectedObject); + } + } + } + } + + std::vector<int> keepIndiciesAfterNMS = od::NonMaxSuppression(detectedObjects, m_NmsThreshold); + + for (const int ind: keepIndiciesAfterNMS) + { + resultsAfterNMS.emplace_back(detectedObjects[ind]); + } + } + + return resultsAfterNMS; +} + +YoloResultDecoder::YoloResultDecoder(float NMSThreshold, float ClsThreshold, float ObjectThreshold) + : m_NmsThreshold(NMSThreshold), m_ClsThreshold(ClsThreshold), m_objectThreshold(ObjectThreshold) {} + +}// namespace od + + + |