From d6cb30e7052891996efa41f608fffe4fa62d2094 Mon Sep 17 00:00:00 2001 From: Derek Lamberti Date: Tue, 28 Apr 2020 13:31:29 +0100 Subject: Add test app to drive YoloV3 split model Change-Id: I4877f170003d3a82f97311046c52571792ad735a Signed-off-by: Derek Lamberti --- tests/CMakeLists.txt | 9 + tests/TfLiteYoloV3Big-Armnn/NMS.cpp | 138 +++++++++ tests/TfLiteYoloV3Big-Armnn/NMS.hpp | 52 ++++ .../TfLiteYoloV3Big-Armnn.cpp | 335 +++++++++++++++++++++ 4 files changed, 534 insertions(+) create mode 100644 tests/TfLiteYoloV3Big-Armnn/NMS.cpp create mode 100644 tests/TfLiteYoloV3Big-Armnn/NMS.hpp create mode 100644 tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 014221b9c4..145af1225d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -209,6 +209,15 @@ if (BUILD_TF_LITE_PARSER) ImagePreprocessor.hpp ImagePreprocessor.cpp) TfLiteParserTest(TfLiteMnasNet-Armnn "${TfLiteMnasNet-Armnn_sources}") + + + set(TfLiteYoloV3Big-Armnn_sources + TfLiteYoloV3Big-Armnn/NMS.cpp + TfLiteYoloV3Big-Armnn/NMS.hpp + TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp + ImagePreprocessor.hpp + ImagePreprocessor.cpp) + TfLiteParserTest(TfLiteYoloV3Big-Armnn "${TfLiteYoloV3Big-Armnn_sources}") endif() if (BUILD_ONNX_PARSER) diff --git a/tests/TfLiteYoloV3Big-Armnn/NMS.cpp b/tests/TfLiteYoloV3Big-Armnn/NMS.cpp new file mode 100644 index 0000000000..3ef840f875 --- /dev/null +++ b/tests/TfLiteYoloV3Big-Armnn/NMS.cpp @@ -0,0 +1,138 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + + +#include "NMS.hpp" + +#include +#include +#include +#include + +namespace yolov3 { +namespace { +/** Number of elements needed to represent a box */ +constexpr int box_elements = 4; +/** Number of elements needed to represent a confidence factor */ +constexpr int confidence_elements = 1; + +/** Calculate Intersection Over Union of two boxes + * + * @param[in] box1 First box + * @param[in] box2 Second box + * + * @return The IoU of the two boxes + */ +float iou(const Box& box1, const Box& box2) +{ + const float area1 = (box1.xmax - box1.xmin) * (box1.ymax - box1.ymin); + const float area2 = (box2.xmax - box2.xmin) * (box2.ymax - box2.ymin); + float overlap; + if (area1 <= 0 || area2 <= 0) + { + overlap = 0.0f; + } + else + { + const auto y_min_intersection = std::max(box1.ymin, box2.ymin); + const auto x_min_intersection = std::max(box1.xmin, box2.xmin); + const auto y_max_intersection = std::min(box1.ymax, box2.ymax); + const auto x_max_intersection = std::min(box1.xmax, box2.xmax); + const auto area_intersection = + std::max(y_max_intersection - y_min_intersection, 0.0f) * + std::max(x_max_intersection - x_min_intersection, 0.0f); + overlap = area_intersection / (area1 + area2 - area_intersection); + } + return overlap; +} + +std::vector convert_to_detections(const NMSConfig& config, + const std::vector& detected_boxes) +{ + const size_t element_step = static_cast( + box_elements + confidence_elements + config.num_classes); + std::vector detections; + + for (unsigned int i = 0; i < config.num_boxes; ++i) + { + const float* cur_box = &detected_boxes[i * element_step]; + if (cur_box[4] > config.confidence_threshold) + { + Detection det; + det.box = {cur_box[0], cur_box[0] + cur_box[2], cur_box[1], + cur_box[1] + cur_box[3]}; + det.confidence = cur_box[4]; + det.classes.resize(static_cast(config.num_classes), 0); + for (unsigned int c = 0; c < config.num_classes; ++c) + { + const float class_prob = det.confidence * cur_box[5 + c]; + if (class_prob > config.confidence_threshold) + { + det.classes[c] = class_prob; + } + } + detections.emplace_back(std::move(det)); + } + } + return detections; +} +} // namespace + +void print_detection(std::ostream& os, + const std::vector& detections) +{ + for (const auto& detection : detections) + { + for (unsigned int c = 0; c < detection.classes.size(); ++c) + { + if (detection.classes[c] != 0.0f) + { + os << c << " " << detection.classes[c] << " " << detection.box.xmin + << " " << detection.box.ymin << " " << detection.box.xmax << " " + << detection.box.ymax << std::endl; + } + } + } +} + +std::vector nms(const NMSConfig& config, + const std::vector& detected_boxes) { + // Get detections that comply with the expected confidence threshold + std::vector detections = + convert_to_detections(config, detected_boxes); + + const unsigned int num_detections = static_cast(detections.size()); + for (unsigned int c = 0; c < config.num_classes; ++c) + { + // Sort classes + std::sort(detections.begin(), detections.begin() + static_cast(num_detections), + [c](Detection& detection1, Detection& detection2) + { + return (detection1.classes[c] - detection2.classes[c]) > 0; + }); + // Clear detections with high IoU + for (unsigned int d = 0; d < num_detections; ++d) + { + // Check if class is already cleared/invalidated + if (detections[d].classes[c] == 0.f) + { + continue; + } + + // Filter out boxes on IoU threshold + const Box& box1 = detections[d].box; + for (unsigned int b = d + 1; b < num_detections; ++b) + { + const Box& box2 = detections[b].box; + if (iou(box1, box2) > config.iou_threshold) + { + detections[b].classes[c] = 0.f; + } + } + } + } + return detections; +} +} // namespace yolov3 diff --git a/tests/TfLiteYoloV3Big-Armnn/NMS.hpp b/tests/TfLiteYoloV3Big-Armnn/NMS.hpp new file mode 100644 index 0000000000..fc23994f58 --- /dev/null +++ b/tests/TfLiteYoloV3Big-Armnn/NMS.hpp @@ -0,0 +1,52 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace yolov3 { +/** Non Maxima Suprresion configuration meta-data */ +struct NMSConfig { + unsigned int num_classes{0}; /**< Number of classes in the detected boxes */ + unsigned int num_boxes{0}; /**< Number of detected boxes */ + float confidence_threshold{0.8f}; /**< Inclusion confidence threshold for a box */ + float iou_threshold{0.8f}; /**< Inclusion threshold for Intersection-Over-Union */ +}; + +/** Box representation structure */ +struct Box { + float xmin; /**< X-pos position of the low left coordinate */ + float xmax; /**< X-pos position of the top right coordinate */ + float ymin; /**< Y-pos position of the low left coordinate */ + float ymax; /**< Y-pos position of the top right coordinate */ +}; + +/** Detection structure */ +struct Detection { + Box box; /**< Detection box */ + float confidence; /**< Confidence of detection */ + std::vector classes; /**< Probability of classes */ +}; + +/** Print identified yolo detections + * + * @param[in, out] os Output stream to print to + * @param[in] detections Detections to print + */ +void print_detection(std::ostream& os, + const std::vector& detections); + +/** Perform Non-Maxima Supression on a list of given detections + * + * @param[in] config Configuration metadata for NMS + * @param[in] detected_boxes Detected boxes + * + * @return A vector with the final detections + */ +std::vector nms(const NMSConfig& config, + const std::vector& detected_boxes); +} // namespace yolov3 diff --git a/tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp b/tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp new file mode 100644 index 0000000000..c96d1f28d0 --- /dev/null +++ b/tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp @@ -0,0 +1,335 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +//#include "../InferenceTest.hpp" +//#include "../ImagePreprocessor.hpp" +#include "armnnTfLiteParser/ITfLiteParser.hpp" + +#include "NMS.hpp" + +#include + +#include +#include +#include +#include + +#include +#include +#include + +using namespace armnnTfLiteParser; +using namespace armnn; + +static const int OPEN_FILE_ERROR = -2; +static const int OPTIMIZE_NETWORK_ERROR = -3; +static const int LOAD_NETWORK_ERROR = -4; +static const int LOAD_IMAGE_ERROR = -5; +static const int GENERAL_ERROR = -100; + +#define CHECK_OK(v) \ + do { \ + try { \ + auto r_local = v; \ + if (r_local != 0) { return r_local;} \ + } \ + catch(armnn::Exception e) \ + { \ + ARMNN_LOG(error) << "Oops: " << e.what(); \ + return GENERAL_ERROR; \ + } \ + } while(0) + + + +template +inline armnn::InputTensors MakeInputTensors(const std::vector& inputBindings, + const std::vector& inputDataContainers) +{ + armnn::InputTensors inputTensors; + + const size_t numInputs = inputBindings.size(); + if (numInputs != inputDataContainers.size()) + { + throw armnn::Exception("Mismatching vectors"); + } + + for (size_t i = 0; i < numInputs; i++) + { + const armnn::BindingPointInfo& inputBinding = inputBindings[i]; + const TContainer& inputData = inputDataContainers[i]; + + armnn::ConstTensor inputTensor(inputBinding.second, inputData.data()); + inputTensors.push_back(std::make_pair(inputBinding.first, inputTensor)); + } + + return inputTensors; +} + +template +inline armnn::OutputTensors MakeOutputTensors(const std::vector& outputBindings, + const std::vector& outputDataContainers) +{ + armnn::OutputTensors outputTensors; + + const size_t numOutputs = outputBindings.size(); + if (numOutputs != outputDataContainers.size()) + { + throw armnn::Exception("Mismatching vectors"); + } + + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::BindingPointInfo& outputBinding = outputBindings[i]; + const TContainer& outputData = outputDataContainers[i]; + + armnn::Tensor outputTensor(outputBinding.second, const_cast(outputData.data())); + outputTensors.push_back(std::make_pair(outputBinding.first, outputTensor)); + } + + return outputTensors; +} + +int LoadModel(const char* filename, + ITfLiteParser& parser, + IRuntime& runtime, + NetworkId& networkId, + const std::vector& backendPreferences) +{ + std::ifstream stream(filename, std::ios::in | std::ios::binary); + if (!stream.is_open()) + { + ARMNN_LOG(error) << "Could not open model: " << filename; + return OPEN_FILE_ERROR; + } + + std::vector contents((std::istreambuf_iterator(stream)), std::istreambuf_iterator()); + stream.close(); + + auto model = parser.CreateNetworkFromBinary(contents); + contents.clear(); + ARMNN_LOG(debug) << "Model loaded ok: " << filename; + + // Optimize backbone model + auto optimizedModel = Optimize(*model, backendPreferences, runtime.GetDeviceSpec()); + if (!optimizedModel) + { + ARMNN_LOG(fatal) << "Could not optimize the model:" << filename; + return OPTIMIZE_NETWORK_ERROR; + } + + // Load backbone model into runtime + { + std::string errorMessage; + INetworkProperties modelProps; + Status status = runtime.LoadNetwork(networkId, std::move(optimizedModel), errorMessage, modelProps); + if (status != Status::Success) + { + ARMNN_LOG(fatal) << "Could not load " << filename << " model into runtime: " << errorMessage; + return LOAD_NETWORK_ERROR; + } + } + + return 0; +} + +std::vector LoadImage(const char* filename) +{ + struct Memory + { + ~Memory() {stbi_image_free(m_Data);} + bool IsLoaded() const { return m_Data != nullptr;} + + unsigned char* m_Data; + }; + + std::vector image; + + int width; + int height; + int channels; + + Memory mem = {stbi_load(filename, &width, &height, &channels, 3)}; + if (!mem.IsLoaded()) + { + ARMNN_LOG(error) << "Could not load input image file: " << filename; + return image; + } + + if (width != 1920 || height != 1080 || channels != 3) + { + ARMNN_LOG(error) << "Input image has wong dimension: " << width << "x" << height << "x" << channels << ". " + " Expected 1920x1080x3."; + return image; + } + + image.resize(1920*1080*3); + + // Expand to float. Does this need de-gamma? + for (unsigned int idx=0; idx <= 1920*1080*3; idx++) + { + image[idx] = static_cast(mem.m_Data[idx]) /255.0f; + } + + return image; +} + +int main(int argc, char* argv[]) +{ + if (argc != 3) + { + ARMNN_LOG(error) << "Expected arguments: {PathToModels} {PathToData}"; + } + std::string modelsPath(argv[1]); + std::string imagePath(argv[2]); + + std::string backboneModelFile = modelsPath + "yolov3_1080_1920_backbone_int8.tflite"; + std::string detectorModelFile = modelsPath + "yolov3_1080_1920_detector_fp32.tflite"; + std::string imageFile = imagePath + "1080_1920.jpg"; + + // Configure the logging + SetAllLoggingSinks(true, true, true); + SetLogFilter(LogSeverity::Trace); + + + // Create runtime + IRuntime::CreationOptions runtimeOptions; // default + auto runtime = IRuntime::Create(runtimeOptions); + if (!runtime) + { + ARMNN_LOG(fatal) << "Could not create runtime."; + return -1; + } + + // Create TfLite Parsers + ITfLiteParser::TfLiteParserOptions parserOptions; + auto parser = ITfLiteParser::Create(parserOptions); + + // Load backbone model + ARMNN_LOG(info) << "Loading backbone..."; + NetworkId backboneId; + CHECK_OK(LoadModel(backboneModelFile.c_str(), *parser, *runtime, backboneId, {"GpuAcc", "CpuRef"})); + auto inputId = parser->GetNetworkInputBindingInfo(0, "inputs"); + auto bbOut0Id = parser->GetNetworkOutputBindingInfo(0, "input_to_detector_1"); + auto bbOut1Id = parser->GetNetworkOutputBindingInfo(0, "input_to_detector_2"); + auto bbOut2Id = parser->GetNetworkOutputBindingInfo(0, "input_to_detector_3"); + auto backboneProfile = runtime->GetProfiler(backboneId); + backboneProfile->EnableProfiling(true); + + // Load detector model + ARMNN_LOG(info) << "Loading detector..."; + NetworkId detectorId; + CHECK_OK(LoadModel(detectorModelFile.c_str(), *parser, *runtime, detectorId, {"CpuAcc", "CpuRef"})); + auto detectIn0Id = parser->GetNetworkInputBindingInfo(0, "input_to_detector_1"); + auto detectIn1Id = parser->GetNetworkInputBindingInfo(0, "input_to_detector_2"); + auto detectIn2Id = parser->GetNetworkInputBindingInfo(0, "input_to_detector_3"); + auto outputBoxesId = parser->GetNetworkOutputBindingInfo(0, "output_boxes"); + auto detectorProfile = runtime->GetProfiler(detectorId); + + // Load input from file + ARMNN_LOG(info) << "Loading test image..."; + auto image = LoadImage(imageFile.c_str()); + if (image.empty()) + { + return LOAD_IMAGE_ERROR; + } + + + // Allocate the intermediate tensors + std::vector intermediateMem0(bbOut0Id.second.GetNumElements()); + std::vector intermediateMem1(bbOut1Id.second.GetNumElements()); + std::vector intermediateMem2(bbOut2Id.second.GetNumElements()); + std::vector intermediateMem3(outputBoxesId.second.GetNumElements()); + + // Setup inputs and outputs + using BindingInfos = std::vector; + using FloatTensors = std::vector>; + + InputTensors bbInputTensors = MakeInputTensors(BindingInfos{inputId}, + FloatTensors{std::move(image)}); + OutputTensors bbOutputTensors = MakeOutputTensors(BindingInfos{bbOut0Id, bbOut1Id, bbOut2Id}, + FloatTensors{intermediateMem0, + intermediateMem1, + intermediateMem2}); + InputTensors detectInputTensors = MakeInputTensors(BindingInfos{detectIn0Id, + detectIn1Id, + detectIn2Id}, + FloatTensors{intermediateMem0, + intermediateMem1, + intermediateMem2}); + OutputTensors detectOutputTensors = MakeOutputTensors(BindingInfos{outputBoxesId}, + FloatTensors{intermediateMem3}); + + static const int numIterations=2; + using DurationUS = std::chrono::duration; + std::vector nmsDurations(0); + nmsDurations.reserve(numIterations); + for (int i=0; i < numIterations; i++) + { + // Execute backbone + ARMNN_LOG(info) << "Running backbone..."; + runtime->EnqueueWorkload(backboneId, bbInputTensors, bbOutputTensors); + + // Execute detector + ARMNN_LOG(info) << "Running detector..."; + runtime->EnqueueWorkload(detectorId, detectInputTensors, detectOutputTensors); + + // Execute NMS + ARMNN_LOG(info) << "Running nms..."; + using clock = std::chrono::steady_clock; + auto nmsStartTime = clock::now(); + yolov3::NMSConfig config; + config.num_boxes = 127800; + config.num_classes = 80; + config.confidence_threshold = 0.9f; + config.iou_threshold = 0.5f; + auto filtered_boxes = yolov3::nms(config, intermediateMem3); + auto nmsEndTime = clock::now(); + + // Enable the profiling after the warm-up run + if (i>0) + { + print_detection(std::cout, filtered_boxes); + + const auto nmsDuration = DurationUS(nmsStartTime - nmsEndTime); + nmsDurations.push_back(nmsDuration); + } + backboneProfile->EnableProfiling(true); + detectorProfile->EnableProfiling(true); + } + // Log timings to file + std::ofstream backboneProfileStream("backbone.json"); + backboneProfile->Print(backboneProfileStream); + backboneProfileStream.close(); + + std::ofstream detectorProfileStream("detector.json"); + detectorProfile->Print(detectorProfileStream); + detectorProfileStream.close(); + + // Manually construct the json output + std::ofstream nmsProfileStream("nms.json"); + nmsProfileStream << "{" << "\n"; + nmsProfileStream << R"( "NmsTimings": {)" << "\n"; + nmsProfileStream << R"( "raw": [)" << "\n"; + bool isFirst = true; + for (auto duration : nmsDurations) + { + if (!isFirst) + { + nmsProfileStream << ",\n"; + } + + nmsProfileStream << " " << duration.count(); + isFirst = false; + } + nmsProfileStream << "\n"; + nmsProfileStream << R"( "units": "us")" << "\n"; + nmsProfileStream << " ]" << "\n"; + nmsProfileStream << " }" << "\n"; + nmsProfileStream << "}" << "\n"; + nmsProfileStream.close(); + + ARMNN_LOG(info) << "Run completed"; + return 0; +} \ No newline at end of file -- cgit v1.2.1