From d6cb30e7052891996efa41f608fffe4fa62d2094 Mon Sep 17 00:00:00 2001
From: Derek Lamberti <derek.lamberti@arm.com>
Date: Tue, 28 Apr 2020 13:31:29 +0100
Subject: Add test app to drive YoloV3 split model

Change-Id: I4877f170003d3a82f97311046c52571792ad735a
Signed-off-by: Derek Lamberti <derek.lamberti@arm.com>
---
 tests/CMakeLists.txt                               |   9 +
 tests/TfLiteYoloV3Big-Armnn/NMS.cpp                | 138 +++++++++
 tests/TfLiteYoloV3Big-Armnn/NMS.hpp                |  52 ++++
 .../TfLiteYoloV3Big-Armnn.cpp                      | 335 +++++++++++++++++++++
 4 files changed, 534 insertions(+)
 create mode 100644 tests/TfLiteYoloV3Big-Armnn/NMS.cpp
 create mode 100644 tests/TfLiteYoloV3Big-Armnn/NMS.hpp
 create mode 100644 tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 014221b9c4..145af1225d 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -209,6 +209,15 @@ if (BUILD_TF_LITE_PARSER)
         ImagePreprocessor.hpp
         ImagePreprocessor.cpp)
     TfLiteParserTest(TfLiteMnasNet-Armnn "${TfLiteMnasNet-Armnn_sources}")
+
+
+    set(TfLiteYoloV3Big-Armnn_sources
+        TfLiteYoloV3Big-Armnn/NMS.cpp
+        TfLiteYoloV3Big-Armnn/NMS.hpp
+        TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp
+        ImagePreprocessor.hpp
+        ImagePreprocessor.cpp)
+    TfLiteParserTest(TfLiteYoloV3Big-Armnn "${TfLiteYoloV3Big-Armnn_sources}")
 endif()
 
 if (BUILD_ONNX_PARSER)
diff --git a/tests/TfLiteYoloV3Big-Armnn/NMS.cpp b/tests/TfLiteYoloV3Big-Armnn/NMS.cpp
new file mode 100644
index 0000000000..3ef840f875
--- /dev/null
+++ b/tests/TfLiteYoloV3Big-Armnn/NMS.cpp
@@ -0,0 +1,138 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include "NMS.hpp"
+
+#include <algorithm>
+#include <cstddef>
+#include <numeric>
+#include <ostream>
+
+namespace yolov3 {
+namespace {
+/** Number of elements needed to represent a box */
+constexpr int box_elements = 4;
+/** Number of elements needed to represent a confidence factor */
+constexpr int confidence_elements = 1;
+
+/** Calculate Intersection Over Union of two boxes
+ *
+ * @param[in] box1 First box
+ * @param[in] box2 Second box
+ *
+ * @return The IoU of the two boxes
+ */
+float iou(const Box& box1, const Box& box2)
+{
+    const float area1 = (box1.xmax - box1.xmin) * (box1.ymax - box1.ymin);
+    const float area2 = (box2.xmax - box2.xmin) * (box2.ymax - box2.ymin);
+    float overlap;
+    if (area1 <= 0 || area2 <= 0)
+    {
+        overlap = 0.0f;
+    }
+    else
+    {
+        const auto y_min_intersection = std::max<float>(box1.ymin, box2.ymin);
+        const auto x_min_intersection = std::max<float>(box1.xmin, box2.xmin);
+        const auto y_max_intersection = std::min<float>(box1.ymax, box2.ymax);
+        const auto x_max_intersection = std::min<float>(box1.xmax, box2.xmax);
+        const auto area_intersection =
+            std::max<float>(y_max_intersection - y_min_intersection, 0.0f) *
+            std::max<float>(x_max_intersection - x_min_intersection, 0.0f);
+        overlap = area_intersection / (area1 + area2 - area_intersection);
+    }
+    return overlap;
+}
+
+std::vector<Detection> convert_to_detections(const NMSConfig& config,
+                                             const std::vector<float>& detected_boxes)
+{
+    const size_t element_step = static_cast<size_t>(
+        box_elements + confidence_elements + config.num_classes);
+    std::vector<Detection> detections;
+
+    for (unsigned int i = 0; i < config.num_boxes; ++i)
+    {
+        const float* cur_box = &detected_boxes[i * element_step];
+        if (cur_box[4] > config.confidence_threshold)
+        {
+            Detection det;
+            det.box = {cur_box[0], cur_box[0] + cur_box[2], cur_box[1],
+                       cur_box[1] + cur_box[3]};
+            det.confidence = cur_box[4];
+            det.classes.resize(static_cast<size_t>(config.num_classes), 0);
+            for (unsigned int c = 0; c < config.num_classes; ++c)
+            {
+                const float class_prob = det.confidence * cur_box[5 + c];
+                if (class_prob > config.confidence_threshold)
+                {
+                    det.classes[c] = class_prob;
+                }
+            }
+            detections.emplace_back(std::move(det));
+        }
+    }
+    return detections;
+}
+} // namespace
+
+void print_detection(std::ostream& os,
+                     const std::vector<Detection>& detections)
+{
+    for (const auto& detection : detections)
+    {
+        for (unsigned int c = 0; c < detection.classes.size(); ++c)
+        {
+            if (detection.classes[c] != 0.0f)
+            {
+                os << c << " " << detection.classes[c] << " " << detection.box.xmin
+                   << " " << detection.box.ymin << " " << detection.box.xmax << " "
+                   << detection.box.ymax << std::endl;
+            }
+        }
+    }
+}
+
+std::vector<Detection> nms(const NMSConfig& config,
+                           const std::vector<float>& detected_boxes) {
+    // Get detections that comply with the expected confidence threshold
+    std::vector<Detection> detections =
+        convert_to_detections(config, detected_boxes);
+
+    const unsigned int num_detections = static_cast<unsigned int>(detections.size());
+    for (unsigned int c = 0; c < config.num_classes; ++c)
+    {
+        // Sort classes
+        std::sort(detections.begin(), detections.begin() + static_cast<std::ptrdiff_t>(num_detections),
+                  [c](Detection& detection1, Detection& detection2)
+                    {
+                        return (detection1.classes[c] - detection2.classes[c]) > 0;
+                    });
+        // Clear detections with high IoU
+        for (unsigned int d = 0; d < num_detections; ++d)
+        {
+            // Check if class is already cleared/invalidated
+            if (detections[d].classes[c] == 0.f)
+            {
+                continue;
+            }
+
+            // Filter out boxes on IoU threshold
+            const Box& box1 = detections[d].box;
+            for (unsigned int b = d + 1; b < num_detections; ++b)
+            {
+                const Box& box2 = detections[b].box;
+                if (iou(box1, box2) > config.iou_threshold)
+                {
+                    detections[b].classes[c] = 0.f;
+                }
+            }
+        }
+    }
+    return detections;
+}
+} // namespace yolov3
diff --git a/tests/TfLiteYoloV3Big-Armnn/NMS.hpp b/tests/TfLiteYoloV3Big-Armnn/NMS.hpp
new file mode 100644
index 0000000000..fc23994f58
--- /dev/null
+++ b/tests/TfLiteYoloV3Big-Armnn/NMS.hpp
@@ -0,0 +1,52 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <ostream>
+#include <vector>
+
+namespace yolov3 {
+/** Non Maxima Suprresion configuration meta-data */
+struct NMSConfig {
+    unsigned int num_classes{0};      /**< Number of classes in the detected boxes */
+    unsigned int num_boxes{0};        /**< Number of detected boxes */
+    float confidence_threshold{0.8f}; /**< Inclusion confidence threshold for a box */
+    float iou_threshold{0.8f};        /**< Inclusion threshold for Intersection-Over-Union */
+};
+
+/** Box representation structure */
+struct Box {
+    float xmin;  /**< X-pos position of the low left coordinate */
+    float xmax;  /**< X-pos position of the top right coordinate */
+    float ymin;  /**< Y-pos position of the low left coordinate */
+    float ymax;  /**< Y-pos position of the top right coordinate */
+};
+
+/** Detection structure */
+struct Detection {
+    Box box;                    /**< Detection box */
+    float confidence;           /**< Confidence of detection */
+    std::vector<float> classes; /**< Probability of classes */
+};
+
+/** Print identified yolo detections
+ *
+ * @param[in, out] os          Output stream to print to
+ * @param[in]      detections  Detections to print
+ */
+void print_detection(std::ostream& os,
+                     const std::vector<Detection>& detections);
+
+/** Perform Non-Maxima Supression on a list of given detections
+ *
+ * @param[in] config         Configuration metadata for NMS
+ * @param[in] detected_boxes Detected boxes
+ *
+ * @return A vector with the final detections
+ */
+std::vector<Detection> nms(const NMSConfig& config,
+                           const std::vector<float>& detected_boxes);
+} // namespace yolov3
diff --git a/tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp b/tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp
new file mode 100644
index 0000000000..c96d1f28d0
--- /dev/null
+++ b/tests/TfLiteYoloV3Big-Armnn/TfLiteYoloV3Big-Armnn.cpp
@@ -0,0 +1,335 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+//#include "../InferenceTest.hpp"
+//#include "../ImagePreprocessor.hpp"
+#include "armnnTfLiteParser/ITfLiteParser.hpp"
+
+#include "NMS.hpp"
+
+#include <stb/stb_image.h>
+
+#include <armnn/INetwork.hpp>
+#include <armnn/IRuntime.hpp>
+#include <armnn/Logging.hpp>
+#include <armnn/utility/IgnoreUnused.hpp>
+
+#include <chrono>
+#include <iostream>
+#include <fstream>
+
+using namespace armnnTfLiteParser;
+using namespace armnn;
+
+static const int OPEN_FILE_ERROR = -2;
+static const int OPTIMIZE_NETWORK_ERROR = -3;
+static const int LOAD_NETWORK_ERROR = -4;
+static const int LOAD_IMAGE_ERROR = -5;
+static const int GENERAL_ERROR = -100;
+
+#define CHECK_OK(v)                                 \
+    do {                                            \
+        try {                                       \
+            auto r_local = v;                       \
+            if (r_local != 0) { return r_local;}    \
+        }                                           \
+        catch(armnn::Exception e)                                  \
+        { \
+            ARMNN_LOG(error) << "Oops: " << e.what(); \
+            return GENERAL_ERROR; \
+        }                 \
+    } while(0)
+
+
+
+template<typename TContainer>
+inline armnn::InputTensors MakeInputTensors(const std::vector<armnn::BindingPointInfo>& inputBindings,
+                                            const std::vector<TContainer>& inputDataContainers)
+{
+    armnn::InputTensors inputTensors;
+
+    const size_t numInputs = inputBindings.size();
+    if (numInputs != inputDataContainers.size())
+    {
+        throw armnn::Exception("Mismatching vectors");
+    }
+
+    for (size_t i = 0; i < numInputs; i++)
+    {
+        const armnn::BindingPointInfo& inputBinding = inputBindings[i];
+        const TContainer& inputData = inputDataContainers[i];
+
+        armnn::ConstTensor inputTensor(inputBinding.second, inputData.data());
+        inputTensors.push_back(std::make_pair(inputBinding.first, inputTensor));
+    }
+
+    return inputTensors;
+}
+
+template<typename TContainer>
+inline armnn::OutputTensors MakeOutputTensors(const std::vector<armnn::BindingPointInfo>& outputBindings,
+                                              const std::vector<TContainer>& outputDataContainers)
+{
+    armnn::OutputTensors outputTensors;
+
+    const size_t numOutputs = outputBindings.size();
+    if (numOutputs != outputDataContainers.size())
+    {
+        throw armnn::Exception("Mismatching vectors");
+    }
+
+    for (size_t i = 0; i < numOutputs; i++)
+    {
+        const armnn::BindingPointInfo& outputBinding = outputBindings[i];
+        const TContainer& outputData = outputDataContainers[i];
+
+        armnn::Tensor outputTensor(outputBinding.second, const_cast<float*>(outputData.data()));
+        outputTensors.push_back(std::make_pair(outputBinding.first, outputTensor));
+    }
+
+    return outputTensors;
+}
+
+int LoadModel(const char* filename,
+              ITfLiteParser& parser,
+              IRuntime& runtime,
+              NetworkId& networkId,
+              const std::vector<BackendId>& backendPreferences)
+{
+    std::ifstream stream(filename, std::ios::in | std::ios::binary);
+    if (!stream.is_open())
+    {
+        ARMNN_LOG(error) << "Could not open model: " << filename;
+        return OPEN_FILE_ERROR;
+    }
+
+    std::vector<uint8_t> contents((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
+    stream.close();
+
+    auto model = parser.CreateNetworkFromBinary(contents);
+    contents.clear();
+    ARMNN_LOG(debug) << "Model loaded ok: " << filename;
+
+    // Optimize backbone model
+    auto optimizedModel = Optimize(*model, backendPreferences, runtime.GetDeviceSpec());
+    if (!optimizedModel)
+    {
+        ARMNN_LOG(fatal) << "Could not optimize the model:" << filename;
+        return OPTIMIZE_NETWORK_ERROR;
+    }
+
+    // Load backbone model into runtime
+    {
+        std::string errorMessage;
+        INetworkProperties modelProps;
+        Status status = runtime.LoadNetwork(networkId, std::move(optimizedModel), errorMessage, modelProps);
+        if (status != Status::Success)
+        {
+            ARMNN_LOG(fatal) << "Could not load " << filename << " model into runtime: " << errorMessage;
+            return LOAD_NETWORK_ERROR;
+        }
+    }
+
+    return 0;
+}
+
+std::vector<float> LoadImage(const char* filename)
+{
+    struct Memory
+    {
+        ~Memory() {stbi_image_free(m_Data);}
+        bool IsLoaded() const { return m_Data != nullptr;}
+
+        unsigned char* m_Data;
+    };
+
+    std::vector<float> image;
+
+    int width;
+    int height;
+    int channels;
+
+    Memory mem = {stbi_load(filename, &width, &height, &channels, 3)};
+    if (!mem.IsLoaded())
+    {
+        ARMNN_LOG(error) << "Could not load input image file: " << filename;
+        return image;
+    }
+
+    if (width != 1920 || height != 1080 || channels != 3)
+    {
+        ARMNN_LOG(error) << "Input image has wong dimension: " << width << "x" << height << "x" << channels << ". "
+          " Expected 1920x1080x3.";
+        return image;
+    }
+
+    image.resize(1920*1080*3);
+
+    // Expand to float. Does this need de-gamma?
+    for (unsigned int idx=0; idx <= 1920*1080*3; idx++)
+    {
+        image[idx] = static_cast<float>(mem.m_Data[idx]) /255.0f;
+    }
+
+    return image;
+}
+
+int main(int argc, char* argv[])
+{
+    if (argc != 3)
+    {
+        ARMNN_LOG(error) << "Expected arguments: {PathToModels} {PathToData}";
+    }
+    std::string modelsPath(argv[1]);
+    std::string imagePath(argv[2]);
+
+    std::string backboneModelFile = modelsPath + "yolov3_1080_1920_backbone_int8.tflite";
+    std::string detectorModelFile = modelsPath + "yolov3_1080_1920_detector_fp32.tflite";
+    std::string imageFile = imagePath + "1080_1920.jpg";
+
+    // Configure the logging
+    SetAllLoggingSinks(true, true, true);
+    SetLogFilter(LogSeverity::Trace);
+
+
+    // Create runtime
+    IRuntime::CreationOptions runtimeOptions; // default
+    auto runtime = IRuntime::Create(runtimeOptions);
+    if (!runtime)
+    {
+        ARMNN_LOG(fatal) << "Could not create runtime.";
+        return -1;
+    }
+
+    // Create TfLite Parsers
+    ITfLiteParser::TfLiteParserOptions parserOptions;
+    auto parser = ITfLiteParser::Create(parserOptions);
+
+    // Load backbone model
+    ARMNN_LOG(info) << "Loading backbone...";
+    NetworkId backboneId;
+    CHECK_OK(LoadModel(backboneModelFile.c_str(), *parser, *runtime, backboneId, {"GpuAcc", "CpuRef"}));
+    auto inputId = parser->GetNetworkInputBindingInfo(0, "inputs");
+    auto bbOut0Id = parser->GetNetworkOutputBindingInfo(0, "input_to_detector_1");
+    auto bbOut1Id = parser->GetNetworkOutputBindingInfo(0, "input_to_detector_2");
+    auto bbOut2Id = parser->GetNetworkOutputBindingInfo(0, "input_to_detector_3");
+    auto backboneProfile = runtime->GetProfiler(backboneId);
+    backboneProfile->EnableProfiling(true);
+
+    // Load detector model
+    ARMNN_LOG(info) << "Loading detector...";
+    NetworkId detectorId;
+    CHECK_OK(LoadModel(detectorModelFile.c_str(), *parser, *runtime, detectorId, {"CpuAcc", "CpuRef"}));
+    auto detectIn0Id = parser->GetNetworkInputBindingInfo(0, "input_to_detector_1");
+    auto detectIn1Id = parser->GetNetworkInputBindingInfo(0, "input_to_detector_2");
+    auto detectIn2Id = parser->GetNetworkInputBindingInfo(0, "input_to_detector_3");
+    auto outputBoxesId = parser->GetNetworkOutputBindingInfo(0, "output_boxes");
+    auto detectorProfile = runtime->GetProfiler(detectorId);
+
+    // Load input from file
+    ARMNN_LOG(info) << "Loading test image...";
+    auto image = LoadImage(imageFile.c_str());
+    if (image.empty())
+    {
+        return LOAD_IMAGE_ERROR;
+    }
+
+
+    // Allocate the intermediate tensors
+    std::vector<float> intermediateMem0(bbOut0Id.second.GetNumElements());
+    std::vector<float> intermediateMem1(bbOut1Id.second.GetNumElements());
+    std::vector<float> intermediateMem2(bbOut2Id.second.GetNumElements());
+    std::vector<float> intermediateMem3(outputBoxesId.second.GetNumElements());
+
+    // Setup inputs and outputs
+    using BindingInfos = std::vector<armnn::BindingPointInfo>;
+    using FloatTensors = std::vector<std::vector<float>>;
+
+    InputTensors bbInputTensors = MakeInputTensors(BindingInfos{inputId},
+                                                   FloatTensors{std::move(image)});
+    OutputTensors bbOutputTensors = MakeOutputTensors(BindingInfos{bbOut0Id, bbOut1Id, bbOut2Id},
+                                                      FloatTensors{intermediateMem0,
+                                                                   intermediateMem1,
+                                                                   intermediateMem2});
+    InputTensors detectInputTensors = MakeInputTensors(BindingInfos{detectIn0Id,
+                                                                    detectIn1Id,
+                                                                    detectIn2Id},
+                                                       FloatTensors{intermediateMem0,
+                                                                    intermediateMem1,
+                                                                    intermediateMem2});
+    OutputTensors detectOutputTensors = MakeOutputTensors(BindingInfos{outputBoxesId},
+                                                          FloatTensors{intermediateMem3});
+
+    static const int numIterations=2;
+    using DurationUS = std::chrono::duration<double, std::micro>;
+    std::vector<DurationUS> nmsDurations(0);
+    nmsDurations.reserve(numIterations);
+    for (int i=0; i < numIterations; i++)
+    {
+        // Execute backbone
+        ARMNN_LOG(info) << "Running backbone...";
+        runtime->EnqueueWorkload(backboneId, bbInputTensors, bbOutputTensors);
+
+        // Execute detector
+        ARMNN_LOG(info) << "Running detector...";
+        runtime->EnqueueWorkload(detectorId, detectInputTensors, detectOutputTensors);
+
+        // Execute NMS
+        ARMNN_LOG(info) << "Running nms...";
+        using clock = std::chrono::steady_clock;
+        auto nmsStartTime = clock::now();
+        yolov3::NMSConfig config;
+        config.num_boxes = 127800;
+        config.num_classes = 80;
+        config.confidence_threshold = 0.9f;
+        config.iou_threshold = 0.5f;
+        auto filtered_boxes = yolov3::nms(config, intermediateMem3);
+        auto nmsEndTime = clock::now();
+
+        // Enable the profiling after the warm-up run
+        if (i>0)
+        {
+            print_detection(std::cout, filtered_boxes);
+
+            const auto nmsDuration = DurationUS(nmsStartTime - nmsEndTime);
+            nmsDurations.push_back(nmsDuration);
+        }
+        backboneProfile->EnableProfiling(true);
+        detectorProfile->EnableProfiling(true);
+    }
+    // Log timings to file
+    std::ofstream backboneProfileStream("backbone.json");
+    backboneProfile->Print(backboneProfileStream);
+    backboneProfileStream.close();
+
+    std::ofstream detectorProfileStream("detector.json");
+    detectorProfile->Print(detectorProfileStream);
+    detectorProfileStream.close();
+
+    // Manually construct the json output
+    std::ofstream nmsProfileStream("nms.json");
+    nmsProfileStream << "{" << "\n";
+    nmsProfileStream << R"(  "NmsTimings": {)" << "\n";
+    nmsProfileStream << R"(    "raw": [)" << "\n";
+    bool isFirst = true;
+    for (auto duration : nmsDurations)
+    {
+        if (!isFirst)
+        {
+            nmsProfileStream << ",\n";
+        }
+
+        nmsProfileStream << "      " << duration.count();
+        isFirst = false;
+    }
+    nmsProfileStream << "\n";
+    nmsProfileStream << R"(    "units": "us")" << "\n";
+    nmsProfileStream << "    ]" << "\n";
+    nmsProfileStream << "  }" << "\n";
+    nmsProfileStream << "}" << "\n";
+    nmsProfileStream.close();
+
+    ARMNN_LOG(info) << "Run completed";
+    return 0;
+}
\ No newline at end of file
-- 
cgit v1.2.1