summaryrefslogtreecommitdiff
path: root/source/use_case
diff options
context:
space:
mode:
authorIsabella Gottardi <isabella.gottardi@arm.com>2022-01-27 16:39:37 +0000
committerKshitij Sisodia <kshitij.sisodia@arm.com>2022-02-08 16:32:28 +0000
commit3107aa2152de9be8317e62da1d0327bcad6552e2 (patch)
tree2ba12a5dd39f28ae1b646e132fbe575c6a442ee9 /source/use_case
parent5cdfa9b834dc5a94c70f9f2b1f5c849dc5439e85 (diff)
downloadml-embedded-evaluation-kit-3107aa2152de9be8317e62da1d0327bcad6552e2.tar.gz
MLECO-2873: Object detection usecase follow-up
Change-Id: Ic14e93a50fb7b3f3cfd9497bac1280794cc0fc15 Signed-off-by: Isabella Gottardi <isabella.gottardi@arm.com>
Diffstat (limited to 'source/use_case')
-rw-r--r--source/use_case/img_class/src/MainLoop.cc6
-rw-r--r--source/use_case/object_detection/include/DetectionResult.hpp32
-rw-r--r--source/use_case/object_detection/include/DetectionUseCaseUtils.hpp72
-rw-r--r--source/use_case/object_detection/include/DetectorPostProcessing.hpp249
-rw-r--r--source/use_case/object_detection/include/YoloFastestModel.hpp5
-rw-r--r--source/use_case/object_detection/src/DetectionUseCaseUtils.cc105
-rw-r--r--[-rwxr-xr-x]source/use_case/object_detection/src/DetectorPostProcessing.cc795
-rw-r--r--source/use_case/object_detection/src/MainLoop.cc30
-rw-r--r--source/use_case/object_detection/src/UseCaseHandler.cc88
-rw-r--r--source/use_case/object_detection/usecase.cmake32
-rw-r--r--source/use_case/vww/include/VisualWakeWordModel.hpp8
-rw-r--r--source/use_case/vww/src/UseCaseHandler.cc26
12 files changed, 698 insertions, 750 deletions
diff --git a/source/use_case/img_class/src/MainLoop.cc b/source/use_case/img_class/src/MainLoop.cc
index ea9f14a..cab360b 100644
--- a/source/use_case/img_class/src/MainLoop.cc
+++ b/source/use_case/img_class/src/MainLoop.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021 - 2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -46,7 +46,7 @@ void main_loop(hal_platform& platform)
ImgClassClassifier classifier; /* Classifier wrapper object. */
caseContext.Set<arm::app::Classifier&>("classifier", classifier);
- std::vector <std::string> labels;
+ std::vector<std::string> labels;
GetLabelsVector(labels);
caseContext.Set<const std::vector <std::string>&>("labels", labels);
@@ -88,4 +88,4 @@ void main_loop(hal_platform& platform)
}
} while (executionSuccessful && bUseMenu);
info("Main loop terminated.\n");
-} \ No newline at end of file
+}
diff --git a/source/use_case/object_detection/include/DetectionResult.hpp b/source/use_case/object_detection/include/DetectionResult.hpp
index 78895f7..aa74d90 100644
--- a/source/use_case/object_detection/include/DetectionResult.hpp
+++ b/source/use_case/object_detection/include/DetectionResult.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -20,31 +20,41 @@
namespace arm {
namespace app {
+namespace object_detection {
/**
* @brief Class representing a single detection result.
*/
class DetectionResult {
public:
- double m_normalisedVal{0.0};
- int m_x0{0};
- int m_y0{0};
- int m_w{0};
- int m_h{0};
-
- DetectionResult() = default;
- ~DetectionResult() = default;
-
+ /**
+ * @brief Constructor
+ * @param[in] normalisedVal Result normalized value
+ * @param[in] x0 Top corner x starting point
+ * @param[in] y0 Top corner y starting point
+ * @param[in] w Detection result width
+ * @param[in] h Detection result height
+ **/
DetectionResult(double normalisedVal,int x0,int y0, int w,int h) :
m_normalisedVal(normalisedVal),
m_x0(x0),
m_y0(y0),
m_w(w),
- m_h(h)
+ m_h(h)
{
}
+
+ DetectionResult() = default;
+ ~DetectionResult() = default;
+
+ double m_normalisedVal{0.0};
+ int m_x0{0};
+ int m_y0{0};
+ int m_w{0};
+ int m_h{0};
};
+} /* namespace object_detection */
} /* namespace app */
} /* namespace arm */
diff --git a/source/use_case/object_detection/include/DetectionUseCaseUtils.hpp b/source/use_case/object_detection/include/DetectionUseCaseUtils.hpp
deleted file mode 100644
index 8ef48ac..0000000
--- a/source/use_case/object_detection/include/DetectionUseCaseUtils.hpp
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DETECTION_USE_CASE_UTILS_HPP
-#define DETECTION_USE_CASE_UTILS_HPP
-
-#include "hal.h"
-#include "DetectionResult.hpp"
-#include "UseCaseHandler.hpp" /* Handlers for different user options. */
-#include <inttypes.h>
-#include <vector>
-
-
-void DisplayDetectionMenu();
-
-namespace image{
-
-
- /**
- * @brief Presents inference results using the data presentation
- * object.
- * @param[in] platform Reference to the hal platform object.
- * @param[in] results Vector of detection results to be displayed.
- * @return true if successful, false otherwise.
- **/
- bool PresentInferenceResult(hal_platform & platform,
- const std::vector < arm::app::DetectionResult > & results);
-
-
- /**
- * @brief Presents inference results along with the inference time using the data presentation
- * object.
- * @param[in] platform Reference to the hal platform object.
- * @param[in] results Vector of detection results to be displayed.
- * @param[in] infTimeMs Inference time in ms.
- * @return true if successful, false otherwise.
- **/
- bool PresentInferenceResult(hal_platform & platform,
- const std::vector < arm::app::DetectionResult > & results,
- const time_t infTimeMs);
-
- /**
- * @brief Presents inference results along with the inference time using the data presentation
- * object.
- * @param[in] platform Reference to the hal platform object.
- * @param[in] results Vector of detection results to be displayed.
- * @param[in] infTimeMs Inference time in ms.
- * @return true if successful, false otherwise.
- **/
- bool PresentInferenceResult(hal_platform & platform,
- const std::vector < arm::app::DetectionResult > & results,
- bool profilingEnabled,
- const time_t infTimeMs = 0);
- }
-
-
-
-
-#endif /* DETECTION_USE_CASE_UTILS_HPP */
diff --git a/source/use_case/object_detection/include/DetectorPostProcessing.hpp b/source/use_case/object_detection/include/DetectorPostProcessing.hpp
index 9a8549c..3e9c819 100644
--- a/source/use_case/object_detection/include/DetectorPostProcessing.hpp
+++ b/source/use_case/object_detection/include/DetectorPostProcessing.hpp
@@ -1,55 +1,194 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef DETECTOR_POST_PROCESSING_HPP
-#define DETECTOR_POST_PROCESSING_HPP
-
-#include "UseCaseCommonUtils.hpp"
-#include "DetectionResult.hpp"
-
-namespace arm {
-namespace app {
-
-#if DISPLAY_RGB_IMAGE
-#define FORMAT_MULTIPLY_FACTOR 3
-#else
-#define FORMAT_MULTIPLY_FACTOR 1
-#endif /* DISPLAY_RGB_IMAGE */
-
- /**
- * @brief Post processing part of Yolo object detection CNN
- * @param[in] img_in Pointer to the input image,detection bounding boxes drown on it.
- * @param[in] model_output Output tesnsors after CNN invoked
- * @param[out] results_out Vector of detected results.
- * @return void
- **/
-void RunPostProcessing(uint8_t *img_in,TfLiteTensor* model_output[2],std::vector<arm::app::DetectionResult> & results_out);
-
-
- /**
- * @brief Converts RGB image to grayscale
- * @param[in] rgb Pointer to RGB input image
- * @param[out] gray Pointer to RGB out image
- * @param[in] im_w Input image width
- * @param[in] im_h Input image height
- * @return void
- **/
-void RgbToGrayscale(const uint8_t *rgb,uint8_t *gray, int im_w,int im_h);
-
-} /* namespace app */
-} /* namespace arm */
-
-#endif /* DETECTOR_POST_PROCESSING_HPP */
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef DETECTOR_POST_PROCESSING_HPP
+#define DETECTOR_POST_PROCESSING_HPP
+
+#include "UseCaseCommonUtils.hpp"
+#include "DetectionResult.hpp"
+#include "YoloFastestModel.hpp"
+
+#include <forward_list>
+
+namespace arm {
+namespace app {
+namespace object_detection {
+
+ struct Branch {
+ int resolution;
+ int numBox;
+ const float* anchor;
+ int8_t* modelOutput;
+ float scale;
+ int zeroPoint;
+ size_t size;
+ };
+
+ struct Network {
+ int inputWidth;
+ int inputHeight;
+ int numClasses;
+ std::vector<Branch> branches;
+ int topN;
+ };
+
+
+ struct Box {
+ float x;
+ float y;
+ float w;
+ float h;
+ };
+
+ struct Detection {
+ Box bbox;
+ std::vector<float> prob;
+ float objectness;
+ };
+
+ /**
+ * @brief Helper class to manage tensor post-processing for "object_detection"
+ * output.
+ */
+ class DetectorPostprocessing {
+ public:
+ /**
+ * @brief Constructor.
+ * @param[in] threshold Post-processing threshold.
+ * @param[in] nms Non-maximum Suppression threshold.
+ * @param[in] numClasses Number of classes.
+ * @param[in] topN Top N for each class.
+ **/
+ DetectorPostprocessing(float threshold = 0.5f,
+ float nms = 0.45f,
+ int numClasses = 1,
+ int topN = 0);
+
+ /**
+ * @brief Post processing part of Yolo object detection CNN.
+ * @param[in] imgIn Pointer to the input image,detection bounding boxes drown on it.
+ * @param[in] imgRows Number of rows in the input image.
+ * @param[in] imgCols Number of columns in the input image.
+ * @param[in] modelOutput Output tensors after CNN invoked.
+ * @param[out] resultsOut Vector of detected results.
+ **/
+ void RunPostProcessing(uint8_t* imgIn,
+ uint32_t imgRows,
+ uint32_t imgCols,
+ TfLiteTensor* modelOutput0,
+ TfLiteTensor* modelOutput1,
+ std::vector<DetectionResult>& resultsOut);
+
+ private:
+ float m_threshold; /* Post-processing threshold */
+ float m_nms; /* NMS threshold */
+ int m_numClasses; /* Number of classes */
+ int m_topN; /* TopN */
+
+ /**
+ * @brief Calculate the Sigmoid function of the give value.
+ * @param[in] x Value.
+ * @return Sigmoid value of the input.
+ **/
+ float Sigmoid(float x);
+
+ /**
+ * @brief Insert the given Detection in the list.
+ * @param[in] detections List of detections.
+ * @param[in] det Detection to be inserted.
+ **/
+ void InsertTopNDetections(std::forward_list<Detection>& detections, Detection& det);
+
+ /**
+ * @brief Given a Network calculate the detection boxes.
+ * @param[in] net Network.
+ * @param[in] imageWidth Original image width.
+ * @param[in] imageHeight Original image height.
+ * @param[in] threshold Detections threshold.
+ * @param[out] detections Detection boxes.
+ **/
+ void GetNetworkBoxes(Network& net,
+ int imageWidth,
+ int imageHeight,
+ float threshold,
+ std::forward_list<Detection>& detections);
+
+ /**
+ * @brief Calculate the 1D overlap.
+ * @param[in] x1Center First center point.
+ * @param[in] width1 First width.
+ * @param[in] x2Center Second center point.
+ * @param[in] width2 Second width.
+ * @return The overlap between the two lines.
+ **/
+ float Calculate1DOverlap(float x1Center, float width1, float x2Center, float width2);
+
+ /**
+ * @brief Calculate the intersection between the two given boxes.
+ * @param[in] box1 First box.
+ * @param[in] box2 Second box.
+ * @return The intersection value.
+ **/
+ float CalculateBoxIntersect(Box& box1, Box& box2);
+
+ /**
+ * @brief Calculate the union between the two given boxes.
+ * @param[in] box1 First box.
+ * @param[in] box2 Second box.
+ * @return The two given boxes union value.
+ **/
+ float CalculateBoxUnion(Box& box1, Box& box2);
+ /**
+ * @brief Calculate the intersection over union between the two given boxes.
+ * @param[in] box1 First box.
+ * @param[in] box2 Second box.
+ * @return The intersection over union value.
+ **/
+ float CalculateBoxIOU(Box& box1, Box& box2);
+
+ /**
+ * @brief Calculate the Non-Maxima suppression on the given detection boxes.
+ * @param[in] detections Detection boxes.
+ * @param[in] classes Number of classes.
+ * @param[in] iouThreshold Intersection over union threshold.
+ * @return true or false based on execution success.
+ **/
+ void CalculateNMS(std::forward_list<Detection>& detections, int classes, float iouThreshold);
+
+ /**
+ * @brief Draw on the given image a bounding box starting at (boxX, boxY).
+ * @param[in/out] imgIn Image.
+ * @param[in] imWidth Image width.
+ * @param[in] imHeight Image height.
+ * @param[in] boxX Axis X starting point.
+ * @param[in] boxY Axis Y starting point.
+ * @param[in] boxWidth Box width.
+ * @param[in] boxHeight Box height.
+ **/
+ void DrawBoxOnImage(uint8_t* imgIn,
+ int imWidth,
+ int imHeight,
+ int boxX,
+ int boxY,
+ int boxWidth,
+ int boxHeight);
+ };
+
+} /* namespace object_detection */
+} /* namespace app */
+} /* namespace arm */
+
+#endif /* DETECTOR_POST_PROCESSING_HPP */
diff --git a/source/use_case/object_detection/include/YoloFastestModel.hpp b/source/use_case/object_detection/include/YoloFastestModel.hpp
index f5709ea..2986a58 100644
--- a/source/use_case/object_detection/include/YoloFastestModel.hpp
+++ b/source/use_case/object_detection/include/YoloFastestModel.hpp
@@ -19,6 +19,11 @@
#include "Model.hpp"
+extern const int originalImageSize;
+extern const int channelsImageDisplayed;
+extern const float anchor1[];
+extern const float anchor2[];
+
namespace arm {
namespace app {
diff --git a/source/use_case/object_detection/src/DetectionUseCaseUtils.cc b/source/use_case/object_detection/src/DetectionUseCaseUtils.cc
deleted file mode 100644
index 1713c7e..0000000
--- a/source/use_case/object_detection/src/DetectionUseCaseUtils.cc
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "DetectionUseCaseUtils.hpp"
-#include "UseCaseCommonUtils.hpp"
-#include "InputFiles.hpp"
-#include <inttypes.h>
-
-
-void DisplayDetectionMenu()
-{
- printf("\n\n");
- printf("User input required\n");
- printf("Enter option number from:\n\n");
- printf(" %u. Run detection on next ifm\n", common::MENU_OPT_RUN_INF_NEXT);
- printf(" %u. Run detection ifm at chosen index\n", common::MENU_OPT_RUN_INF_CHOSEN);
- printf(" %u. Run detection on all ifm\n", common::MENU_OPT_RUN_INF_ALL);
- printf(" %u. Show NN model info\n", common::MENU_OPT_SHOW_MODEL_INFO);
- printf(" %u. List ifm\n\n", common::MENU_OPT_LIST_IFM);
- printf(" Choice: ");
- fflush(stdout);
-}
-
-
-bool image::PresentInferenceResult(hal_platform& platform,
- const std::vector<arm::app::DetectionResult>& results)
-{
- return PresentInferenceResult(platform, results, false);
-}
-
-bool image::PresentInferenceResult(hal_platform &platform,
- const std::vector<arm::app::DetectionResult> &results,
- const time_t infTimeMs)
-{
- return PresentInferenceResult(platform, results, true, infTimeMs);
-}
-
-
-bool image::PresentInferenceResult(hal_platform &platform,
- const std::vector<arm::app::DetectionResult> &results,
- bool profilingEnabled,
- const time_t infTimeMs)
-{
- constexpr uint32_t dataPsnTxtStartX1 = 150;
- constexpr uint32_t dataPsnTxtStartY1 = 30;
-
-
- if(profilingEnabled)
- {
- platform.data_psn->set_text_color(COLOR_YELLOW);
-
- /* If profiling is enabled, and the time is valid. */
- info("Final results:\n");
- info("Total number of inferences: 1\n");
- if (infTimeMs)
- {
- std::string strInf =
- std::string{"Inference: "} +
- std::to_string(infTimeMs) +
- std::string{"ms"};
- platform.data_psn->present_data_text(
- strInf.c_str(), strInf.size(),
- dataPsnTxtStartX1, dataPsnTxtStartY1, 0);
- }
- }
- platform.data_psn->set_text_color(COLOR_GREEN);
-
- if(!profilingEnabled) {
- info("Final results:\n");
- info("Total number of inferences: 1\n");
- }
-
- for (uint32_t i = 0; i < results.size(); ++i) {
-
- if(profilingEnabled) {
- info("%" PRIu32 ") (%f) -> %s {x=%d,y=%d,w=%d,h=%d}\n", i,
- results[i].m_normalisedVal, "Detection box:",
- results[i].m_x0, results[i].m_y0, results[i].m_w, results[i].m_h );
- }
- else
- {
- info("%" PRIu32 ") (%f) -> %s {x=%d,y=%d,w=%d,h=%d}\n", i,
- results[i].m_normalisedVal, "Detection box:",
- results[i].m_x0, results[i].m_y0, results[i].m_w, results[i].m_h );
- }
- }
-
- return true;
-}
-
-
-
diff --git a/source/use_case/object_detection/src/DetectorPostProcessing.cc b/source/use_case/object_detection/src/DetectorPostProcessing.cc
index e781b62..edfb137 100755..100644
--- a/source/use_case/object_detection/src/DetectorPostProcessing.cc
+++ b/source/use_case/object_detection/src/DetectorPostProcessing.cc
@@ -1,447 +1,348 @@
-/*
- * Copyright (c) 2022 Arm Limited. All rights reserved.
- * SPDX-License-Identifier: Apache-2.0
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "DetectorPostProcessing.hpp"
-#include <algorithm>
-#include <cmath>
-#include <stdint.h>
-#include <forward_list>
-
-
-typedef struct boxabs {
- float left, right, top, bot;
-} boxabs;
-
-
-typedef struct branch {
- int resolution;
- int num_box;
- float *anchor;
- int8_t *tf_output;
- float scale;
- int zero_point;
- size_t size;
- float scale_x_y;
-} branch;
-
-typedef struct network {
- int input_w;
- int input_h;
- int num_classes;
- int num_branch;
- branch *branchs;
- int topN;
-} network;
-
-
-typedef struct box {
- float x, y, w, h;
-} box;
-
-typedef struct detection{
- box bbox;
- float *prob;
- float objectness;
-} detection;
-
-
-
-static int sort_class;
-
-static void free_dets(std::forward_list<detection> &dets){
- std::forward_list<detection>::iterator it;
- for ( it = dets.begin(); it != dets.end(); ++it ){
- free(it->prob);
- }
-}
-
-float sigmoid(float x)
-{
- return 1.f/(1.f + exp(-x));
-}
-
-static bool det_objectness_comparator(detection &pa, detection &pb)
-{
- return pa.objectness < pb.objectness;
-}
-
-static void insert_topN_det(std::forward_list<detection> &dets, detection det)
-{
- std::forward_list<detection>::iterator it;
- std::forward_list<detection>::iterator last_it;
- for ( it = dets.begin(); it != dets.end(); ++it ){
- if(it->objectness > det.objectness)
- break;
- last_it = it;
- }
- if(it != dets.begin()){
- dets.emplace_after(last_it, det);
- free(dets.begin()->prob);
- dets.pop_front();
- }
- else{
- free(det.prob);
- }
-}
-
-static std::forward_list<detection> get_network_boxes(network *net, int image_w, int image_h, float thresh, int *num)
-{
- std::forward_list<detection> dets;
- int i;
- int num_classes = net->num_classes;
- *num = 0;
-
- for (i = 0; i < net->num_branch; ++i) {
- int height = net->branchs[i].resolution;
- int width = net->branchs[i].resolution;
- int channel = net->branchs[i].num_box*(5+num_classes);
-
- for (int h = 0; h < net->branchs[i].resolution; h++) {
- for (int w = 0; w < net->branchs[i].resolution; w++) {
- for (int anc = 0; anc < net->branchs[i].num_box; anc++) {
-
- // objectness score
- int bbox_obj_offset = h * width * channel + w * channel + anc * (num_classes + 5) + 4;
- float objectness = sigmoid(((float)net->branchs[i].tf_output[bbox_obj_offset] - net->branchs[i].zero_point) * net->branchs[i].scale);
-
- if(objectness > thresh){
- detection det;
- det.prob = (float*)calloc(num_classes, sizeof(float));
- det.objectness = objectness;
- //get bbox prediction data for each anchor, each feature point
- int bbox_x_offset = bbox_obj_offset -4;
- int bbox_y_offset = bbox_x_offset + 1;
- int bbox_w_offset = bbox_x_offset + 2;
- int bbox_h_offset = bbox_x_offset + 3;
- int bbox_scores_offset = bbox_x_offset + 5;
- //int bbox_scores_step = 1;
- det.bbox.x = ((float)net->branchs[i].tf_output[bbox_x_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
- det.bbox.y = ((float)net->branchs[i].tf_output[bbox_y_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
- det.bbox.w = ((float)net->branchs[i].tf_output[bbox_w_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
- det.bbox.h = ((float)net->branchs[i].tf_output[bbox_h_offset] - net->branchs[i].zero_point) * net->branchs[i].scale;
-
-
- float bbox_x, bbox_y;
-
- // Eliminate grid sensitivity trick involved in YOLOv4
- bbox_x = sigmoid(det.bbox.x); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
- bbox_y = sigmoid(det.bbox.y); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2;
- det.bbox.x = (bbox_x + w) / width;
- det.bbox.y = (bbox_y + h) / height;
-
- det.bbox.w = exp(det.bbox.w) * net->branchs[i].anchor[anc*2] / net->input_w;
- det.bbox.h = exp(det.bbox.h) * net->branchs[i].anchor[anc*2+1] / net->input_h;
-
- for (int s = 0; s < num_classes; s++) {
- det.prob[s] = sigmoid(((float)net->branchs[i].tf_output[bbox_scores_offset + s] - net->branchs[i].zero_point) * net->branchs[i].scale)*objectness;
- det.prob[s] = (det.prob[s] > thresh) ? det.prob[s] : 0;
- }
-
- //correct_yolo_boxes
- det.bbox.x *= image_w;
- det.bbox.w *= image_w;
- det.bbox.y *= image_h;
- det.bbox.h *= image_h;
-
- if (*num < net->topN || net->topN <=0){
- dets.emplace_front(det);
- *num += 1;
- }
- else if(*num == net->topN){
- dets.sort(det_objectness_comparator);
- insert_topN_det(dets,det);
- *num += 1;
- }else{
- insert_topN_det(dets,det);
- }
- }
- }
- }
- }
- }
- if(*num > net->topN)
- *num -=1;
- return dets;
-}
-
-// init part
-
-static branch create_brach(int resolution, int num_box, float *anchor, int8_t *tf_output, size_t size, float scale, int zero_point)
-{
- branch b;
- b.resolution = resolution;
- b.num_box = num_box;
- b.anchor = anchor;
- b.tf_output = tf_output;
- b.size = size;
- b.scale = scale;
- b.zero_point = zero_point;
- return b;
-}
-
-static network creat_network(int input_w, int input_h, int num_classes, int num_branch, branch* branchs, int topN)
-{
- network net;
- net.input_w = input_w;
- net.input_h = input_h;
- net.num_classes = num_classes;
- net.num_branch = num_branch;
- net.branchs = branchs;
- net.topN = topN;
- return net;
-}
-
-// NMS part
-
-static float Calc1DOverlap(float x1_center, float width1, float x2_center, float width2)
-{
- float left_1 = x1_center - width1/2;
- float left_2 = x2_center - width2/2;
- float leftest;
- if (left_1 > left_2) {
- leftest = left_1;
- } else {
- leftest = left_2;
- }
-
- float right_1 = x1_center + width1/2;
- float right_2 = x2_center + width2/2;
- float rightest;
- if (right_1 < right_2) {
- rightest = right_1;
- } else {
- rightest = right_2;
- }
-
- return rightest - leftest;
-}
-
-
-static float CalcBoxIntersect(box box1, box box2)
-{
- float width = Calc1DOverlap(box1.x, box1.w, box2.x, box2.w);
- if (width < 0) return 0;
- float height = Calc1DOverlap(box1.y, box1.h, box2.y, box2.h);
- if (height < 0) return 0;
-
- float total_area = width*height;
- return total_area;
-}
-
-
-static float CalcBoxUnion(box box1, box box2)
-{
- float boxes_intersection = CalcBoxIntersect(box1, box2);
- float boxes_union = box1.w*box1.h + box2.w*box2.h - boxes_intersection;
- return boxes_union;
-}
-
-
-static float CalcBoxIOU(box box1, box box2)
-{
- float boxes_intersection = CalcBoxIntersect(box1, box2);
-
- if (boxes_intersection == 0) return 0;
-
- float boxes_union = CalcBoxUnion(box1, box2);
-
- if (boxes_union == 0) return 0;
-
- return boxes_intersection / boxes_union;
-}
-
-
-static bool CompareProbs(detection &prob1, detection &prob2)
-{
- return prob1.prob[sort_class] > prob2.prob[sort_class];
-}
-
-
-static void CalcNMS(std::forward_list<detection> &detections, int classes, float iou_threshold)
-{
- int k;
-
- for (k = 0; k < classes; ++k) {
- sort_class = k;
- detections.sort(CompareProbs);
-
- for (std::forward_list<detection>::iterator it=detections.begin(); it != detections.end(); ++it){
- if (it->prob[k] == 0) continue;
- for (std::forward_list<detection>::iterator itc=std::next(it, 1); itc != detections.end(); ++itc){
- if (itc->prob[k] == 0) continue;
- if (CalcBoxIOU(it->bbox, itc->bbox) > iou_threshold) {
- itc->prob[k] = 0;
- }
- }
- }
- }
-}
-
-
-static void inline check_and_fix_offset(int im_w,int im_h,int *offset)
-{
-
- if (!offset) return;
-
- if ( (*offset) >= im_w*im_h*FORMAT_MULTIPLY_FACTOR)
- (*offset) = im_w*im_h*FORMAT_MULTIPLY_FACTOR -1;
- else if ( (*offset) < 0)
- *offset =0;
-
-}
-
-
-static void DrawBoxOnImage(uint8_t *img_in,int im_w,int im_h,int bx,int by,int bw,int bh)
-{
-
- if (!img_in) {
- return;
- }
-
- int offset=0;
- for (int i=0; i < bw; i++) {
- /*draw two lines */
- for (int line=0; line < 2; line++) {
- /*top*/
- offset =(i + (by + line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;
- check_and_fix_offset(im_w,im_h,&offset);
- img_in[offset] = 0xFF; /* FORMAT_MULTIPLY_FACTOR for rgb or grayscale*/
- /*bottom*/
- offset = (i + (by + bh - line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR;
- check_and_fix_offset(im_w,im_h,&offset);
- img_in[offset] = 0xFF;
- }
- }
-
- for (int i=0; i < bh; i++) {
- /*draw two lines */
- for (int line=0; line < 2; line++) {
- /*left*/
- offset = ((i + by)*im_w + bx + line)*FORMAT_MULTIPLY_FACTOR;
- check_and_fix_offset(im_w,im_h,&offset);
- img_in[offset] = 0xFF;
- /*right*/
- offset = ((i + by)*im_w + bx + bw - line)*FORMAT_MULTIPLY_FACTOR;
- check_and_fix_offset(im_w,im_h,&offset);
- img_in[offset] = 0xFF;
- }
- }
-
-}
-
-
-void arm::app::RunPostProcessing(uint8_t *img_in,TfLiteTensor* model_output[2],std::vector<arm::app::DetectionResult> & results_out)
-{
-
- TfLiteTensor* output[2] = {nullptr,nullptr};
- int input_w = INPUT_IMAGE_WIDTH;
- int input_h = INPUT_IMAGE_HEIGHT;
-
- for(int anchor=0;anchor<2;anchor++)
- {
- output[anchor] = model_output[anchor];
- }
-
- /* init postprocessing */
- int num_classes = 1;
- int num_branch = 2;
- int topN = 0;
-
- branch* branchs = (branch*)calloc(num_branch, sizeof(branch));
-
- /*NOTE: anchors are different for any given input model size, estimated during training phase */
- float anchor1[] = {38, 77, 47, 97, 61, 126};
- float anchor2[] = {14, 26, 19, 37, 28, 55 };
-
-
- branchs[0] = create_brach(INPUT_IMAGE_WIDTH/32, 3, anchor1, output[0]->data.int8, output[0]->bytes, ((TfLiteAffineQuantization*)(output[0]->quantization.params))->scale->data[0], ((TfLiteAffineQuantization*)(output[0]->quantization.params))->zero_point->data[0]);
-
- branchs[1] = create_brach(INPUT_IMAGE_WIDTH/16, 3, anchor2, output[1]->data.int8, output[1]->bytes, ((TfLiteAffineQuantization*)(output[1]->quantization.params))->scale->data[0],((TfLiteAffineQuantization*)(output[1]->quantization.params))->zero_point->data[0]);
-
- network net = creat_network(input_w, input_h, num_classes, num_branch, branchs,topN);
- /* end init */
-
- /* start postprocessing */
- int nboxes=0;
- float thresh = .5;//50%
- float nms = .45;
- int orig_image_width = ORIGINAL_IMAGE_WIDTH;
- int orig_image_height = ORIGINAL_IMAGE_HEIGHT;
- std::forward_list<detection> dets = get_network_boxes(&net, orig_image_width, orig_image_height, thresh, &nboxes);
- /* do nms */
- CalcNMS(dets, net.num_classes, nms);
- uint8_t temp_unsuppressed_counter = 0;
- int j;
- for (std::forward_list<detection>::iterator it=dets.begin(); it != dets.end(); ++it){
- float xmin = it->bbox.x - it->bbox.w / 2.0f;
- float xmax = it->bbox.x + it->bbox.w / 2.0f;
- float ymin = it->bbox.y - it->bbox.h / 2.0f;
- float ymax = it->bbox.y + it->bbox.h / 2.0f;
-
- if (xmin < 0) xmin = 0;
- if (ymin < 0) ymin = 0;
- if (xmax > orig_image_width) xmax = orig_image_width;
- if (ymax > orig_image_height) ymax = orig_image_height;
-
- float bx = xmin;
- float by = ymin;
- float bw = xmax - xmin;
- float bh = ymax - ymin;
-
- for (j = 0; j < net.num_classes; ++j) {
- if (it->prob[j] > 0) {
-
- arm::app::DetectionResult tmp_result = {};
-
- tmp_result.m_normalisedVal = it->prob[j];
- tmp_result.m_x0=bx;
- tmp_result.m_y0=by;
- tmp_result.m_w=bw;
- tmp_result.m_h=bh;
-
- results_out.push_back(tmp_result);
-
- DrawBoxOnImage(img_in,orig_image_width,orig_image_height,bx,by,bw,bh);
-
- temp_unsuppressed_counter++;
- }
- }
- }
-
- free_dets(dets);
- free(branchs);
-
-}
-
-void arm::app::RgbToGrayscale(const uint8_t *rgb,uint8_t *gray, int im_w,int im_h)
-{
- float R=0.299;
- float G=0.587;
- float B=0.114;
- for (int i=0; i< im_w*im_h; i++ ) {
-
- uint32_t int_gray = rgb[i*3 + 0]*R + rgb[i*3 + 1]*G+ rgb[i*3 + 2]*B;
- /*clip if need */
- if (int_gray <= UINT8_MAX) {
- gray[i] = int_gray;
- } else {
- gray[i] = UINT8_MAX;
- }
-
- }
-
-}
-
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "DetectorPostProcessing.hpp"
+
+#include <algorithm>
+#include <cmath>
+
+namespace arm {
+namespace app {
+namespace object_detection {
+
+DetectorPostprocessing::DetectorPostprocessing(
+ const float threshold,
+ const float nms,
+ int numClasses,
+ int topN)
+ : m_threshold(threshold),
+ m_nms(nms),
+ m_numClasses(numClasses),
+ m_topN(topN)
+{}
+
+void DetectorPostprocessing::RunPostProcessing(
+ uint8_t* imgIn,
+ uint32_t imgRows,
+ uint32_t imgCols,
+ TfLiteTensor* modelOutput0,
+ TfLiteTensor* modelOutput1,
+ std::vector<DetectionResult>& resultsOut)
+{
+ /* init postprocessing */
+ Network net {
+ .inputWidth = static_cast<int>(imgCols),
+ .inputHeight = static_cast<int>(imgRows),
+ .numClasses = m_numClasses,
+ .branches = {
+ Branch {
+ .resolution = static_cast<int>(imgCols/32),
+ .numBox = 3,
+ .anchor = anchor1,
+ .modelOutput = modelOutput0->data.int8,
+ .scale = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->scale->data[0],
+ .zeroPoint = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->zero_point->data[0],
+ .size = modelOutput0->bytes
+ },
+ Branch {
+ .resolution = static_cast<int>(imgCols/16),
+ .numBox = 3,
+ .anchor = anchor2,
+ .modelOutput = modelOutput1->data.int8,
+ .scale = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->scale->data[0],
+ .zeroPoint = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->zero_point->data[0],
+ .size = modelOutput1->bytes
+ }
+ },
+ .topN = m_topN
+ };
+ /* End init */
+
+ /* Start postprocessing */
+ int originalImageWidth = originalImageSize;
+ int originalImageHeight = originalImageSize;
+
+ std::forward_list<Detection> detections;
+ GetNetworkBoxes(net, originalImageWidth, originalImageHeight, m_threshold, detections);
+
+ /* Do nms */
+ CalculateNMS(detections, net.numClasses, m_nms);
+
+ for (auto& it: detections) {
+ float xMin = it.bbox.x - it.bbox.w / 2.0f;
+ float xMax = it.bbox.x + it.bbox.w / 2.0f;
+ float yMin = it.bbox.y - it.bbox.h / 2.0f;
+ float yMax = it.bbox.y + it.bbox.h / 2.0f;
+
+ if (xMin < 0) {
+ xMin = 0;
+ }
+ if (yMin < 0) {
+ yMin = 0;
+ }
+ if (xMax > originalImageWidth) {
+ xMax = originalImageWidth;
+ }
+ if (yMax > originalImageHeight) {
+ yMax = originalImageHeight;
+ }
+
+ float boxX = xMin;
+ float boxY = yMin;
+ float boxWidth = xMax - xMin;
+ float boxHeight = yMax - yMin;
+
+ for (int j = 0; j < net.numClasses; ++j) {
+ if (it.prob[j] > 0) {
+
+ DetectionResult tmpResult = {};
+ tmpResult.m_normalisedVal = it.prob[j];
+ tmpResult.m_x0 = boxX;
+ tmpResult.m_y0 = boxY;
+ tmpResult.m_w = boxWidth;
+ tmpResult.m_h = boxHeight;
+
+ resultsOut.push_back(tmpResult);
+
+ /* TODO: Instead of draw on the image, return the boxes and draw on the LCD */
+ DrawBoxOnImage(imgIn, originalImageWidth, originalImageHeight, boxX, boxY, boxWidth, boxHeight);;
+ }
+ }
+ }
+}
+
+float DetectorPostprocessing::Sigmoid(float x)
+{
+ return 1.f/(1.f + exp(-x));
+}
+
+void DetectorPostprocessing::InsertTopNDetections(std::forward_list<Detection>& detections, Detection& det)
+{
+ std::forward_list<Detection>::iterator it;
+ std::forward_list<Detection>::iterator last_it;
+ for ( it = detections.begin(); it != detections.end(); ++it ) {
+ if(it->objectness > det.objectness)
+ break;
+ last_it = it;
+ }
+ if(it != detections.begin()) {
+ detections.emplace_after(last_it, det);
+ detections.pop_front();
+ }
+}
+
+void DetectorPostprocessing::GetNetworkBoxes(Network& net, int imageWidth, int imageHeight, float threshold, std::forward_list<Detection>& detections)
+{
+ int numClasses = net.numClasses;
+ int num = 0;
+ auto det_objectness_comparator = [](Detection& pa, Detection& pb) {
+ return pa.objectness < pb.objectness;
+ };
+ for (size_t i = 0; i < net.branches.size(); ++i) {
+ int height = net.branches[i].resolution;
+ int width = net.branches[i].resolution;
+ int channel = net.branches[i].numBox*(5+numClasses);
+
+ for (int h = 0; h < net.branches[i].resolution; h++) {
+ for (int w = 0; w < net.branches[i].resolution; w++) {
+ for (int anc = 0; anc < net.branches[i].numBox; anc++) {
+
+ /* Objectness score */
+ int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
+ float objectness = Sigmoid(((float)net.branches[i].modelOutput[bbox_obj_offset] - net.branches[i].zeroPoint) * net.branches[i].scale);
+
+ if(objectness > threshold) {
+ Detection det;
+ det.objectness = objectness;
+ /* Get bbox prediction data for each anchor, each feature point */
+ int bbox_x_offset = bbox_obj_offset -4;
+ int bbox_y_offset = bbox_x_offset + 1;
+ int bbox_w_offset = bbox_x_offset + 2;
+ int bbox_h_offset = bbox_x_offset + 3;
+ int bbox_scores_offset = bbox_x_offset + 5;
+
+ det.bbox.x = ((float)net.branches[i].modelOutput[bbox_x_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.y = ((float)net.branches[i].modelOutput[bbox_y_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.w = ((float)net.branches[i].modelOutput[bbox_w_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.h = ((float)net.branches[i].modelOutput[bbox_h_offset] - net.branches[i].zeroPoint) * net.branches[i].scale;
+
+
+ float bbox_x, bbox_y;
+
+ /* Eliminate grid sensitivity trick involved in YOLOv4 */
+ bbox_x = Sigmoid(det.bbox.x);
+ bbox_y = Sigmoid(det.bbox.y);
+ det.bbox.x = (bbox_x + w) / width;
+ det.bbox.y = (bbox_y + h) / height;
+
+ det.bbox.w = exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
+ det.bbox.h = exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
+
+ for (int s = 0; s < numClasses; s++) {
+ float sig = Sigmoid(((float)net.branches[i].modelOutput[bbox_scores_offset + s] - net.branches[i].zeroPoint) * net.branches[i].scale)*objectness;
+ det.prob.emplace_back((sig > threshold) ? sig : 0);
+ }
+
+ /* Correct_YOLO_boxes */
+ det.bbox.x *= imageWidth;
+ det.bbox.w *= imageWidth;
+ det.bbox.y *= imageHeight;
+ det.bbox.h *= imageHeight;
+
+ if (num < net.topN || net.topN <=0) {
+ detections.emplace_front(det);
+ num += 1;
+ } else if (num == net.topN) {
+ detections.sort(det_objectness_comparator);
+ InsertTopNDetections(detections,det);
+ num += 1;
+ } else {
+ InsertTopNDetections(detections,det);
+ }
+ }
+ }
+ }
+ }
+ }
+ if(num > net.topN)
+ num -=1;
+}
+
+float DetectorPostprocessing::Calculate1DOverlap(float x1Center, float width1, float x2Center, float width2)
+{
+ float left_1 = x1Center - width1/2;
+ float left_2 = x2Center - width2/2;
+ float leftest = left_1 > left_2 ? left_1 : left_2;
+
+ float right_1 = x1Center + width1/2;
+ float right_2 = x2Center + width2/2;
+ float rightest = right_1 < right_2 ? right_1 : right_2;
+
+ return rightest - leftest;
+}
+
+float DetectorPostprocessing::CalculateBoxIntersect(Box& box1, Box& box2)
+{
+ float width = Calculate1DOverlap(box1.x, box1.w, box2.x, box2.w);
+ if (width < 0) {
+ return 0;
+ }
+ float height = Calculate1DOverlap(box1.y, box1.h, box2.y, box2.h);
+ if (height < 0) {
+ return 0;
+ }
+
+ float total_area = width*height;
+ return total_area;
+}
+
+float DetectorPostprocessing::CalculateBoxUnion(Box& box1, Box& box2)
+{
+ float boxes_intersection = CalculateBoxIntersect(box1, box2);
+ float boxes_union = box1.w * box1.h + box2.w * box2.h - boxes_intersection;
+ return boxes_union;
+}
+
+
+float DetectorPostprocessing::CalculateBoxIOU(Box& box1, Box& box2)
+{
+ float boxes_intersection = CalculateBoxIntersect(box1, box2);
+ if (boxes_intersection == 0) {
+ return 0;
+ }
+
+ float boxes_union = CalculateBoxUnion(box1, box2);
+ if (boxes_union == 0) {
+ return 0;
+ }
+
+ return boxes_intersection / boxes_union;
+}
+
+void DetectorPostprocessing::CalculateNMS(std::forward_list<Detection>& detections, int classes, float iouThreshold)
+{
+ int idxClass{0};
+ auto CompareProbs = [idxClass](Detection& prob1, Detection& prob2) {
+ return prob1.prob[idxClass] > prob2.prob[idxClass];
+ };
+
+ for (idxClass = 0; idxClass < classes; ++idxClass) {
+ detections.sort(CompareProbs);
+
+ for (std::forward_list<Detection>::iterator it=detections.begin(); it != detections.end(); ++it) {
+ if (it->prob[idxClass] == 0) continue;
+ for (std::forward_list<Detection>::iterator itc=std::next(it, 1); itc != detections.end(); ++itc) {
+ if (itc->prob[idxClass] == 0) {
+ continue;
+ }
+ if (CalculateBoxIOU(it->bbox, itc->bbox) > iouThreshold) {
+ itc->prob[idxClass] = 0;
+ }
+ }
+ }
+ }
+}
+
+void DetectorPostprocessing::DrawBoxOnImage(uint8_t* imgIn, int imWidth, int imHeight, int boxX,int boxY, int boxWidth, int boxHeight)
+{
+ auto CheckAndFixOffset = [](int im_width,int im_height,int& offset) {
+ if ( (offset) >= im_width*im_height*channelsImageDisplayed) {
+ offset = im_width * im_height * channelsImageDisplayed -1;
+ }
+ else if ( (offset) < 0) {
+ offset = 0;
+ }
+ };
+
+ /* Consistency checks */
+ if (!imgIn) {
+ return;
+ }
+
+ int offset=0;
+ for (int i=0; i < boxWidth; i++) {
+ /* Draw two horizontal lines */
+ for (int line=0; line < 2; line++) {
+ /*top*/
+ offset =(i + (boxY + line)*imWidth + boxX) * channelsImageDisplayed; /* channelsImageDisplayed for rgb or grayscale*/
+ CheckAndFixOffset(imWidth,imHeight,offset);
+ imgIn[offset] = 0xFF;
+ /*bottom*/
+ offset = (i + (boxY + boxHeight - line)*imWidth + boxX) * channelsImageDisplayed;
+ CheckAndFixOffset(imWidth,imHeight,offset);
+ imgIn[offset] = 0xFF;
+ }
+ }
+
+ for (int i=0; i < boxHeight; i++) {
+ /* Draw two vertical lines */
+ for (int line=0; line < 2; line++) {
+ /*left*/
+ offset = ((i + boxY)*imWidth + boxX + line)*channelsImageDisplayed;
+ CheckAndFixOffset(imWidth,imHeight,offset);
+ imgIn[offset] = 0xFF;
+ /*right*/
+ offset = ((i + boxY)*imWidth + boxX + boxWidth - line)*channelsImageDisplayed;
+ CheckAndFixOffset(imWidth,imHeight, offset);
+ imgIn[offset] = 0xFF;
+ }
+ }
+
+}
+
+} /* namespace object_detection */
+} /* namespace app */
+} /* namespace arm */
diff --git a/source/use_case/object_detection/src/MainLoop.cc b/source/use_case/object_detection/src/MainLoop.cc
index b0fbf96..d8fc7f5 100644
--- a/source/use_case/object_detection/src/MainLoop.cc
+++ b/source/use_case/object_detection/src/MainLoop.cc
@@ -14,14 +14,28 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-#include "hal.h" /* Brings in platform definitions. */
-#include "InputFiles.hpp" /* For input images. */
-#include "YoloFastestModel.hpp" /* Model class for running inference. */
-#include "UseCaseHandler.hpp" /* Handlers for different user options. */
-#include "UseCaseCommonUtils.hpp" /* Utils functions. */
-#include "DetectionUseCaseUtils.hpp" /* Utils functions specific to object detection. */
+#include "hal.h" /* Brings in platform definitions. */
+#include "InputFiles.hpp" /* For input images. */
+#include "YoloFastestModel.hpp" /* Model class for running inference. */
+#include "UseCaseHandler.hpp" /* Handlers for different user options. */
+#include "UseCaseCommonUtils.hpp" /* Utils functions. */
+#include "DetectorPostProcessing.hpp" /* Post-processing class. */
+static void DisplayDetectionMenu()
+{
+ printf("\n\n");
+ printf("User input required\n");
+ printf("Enter option number from:\n\n");
+ printf(" %u. Run detection on next ifm\n", common::MENU_OPT_RUN_INF_NEXT);
+ printf(" %u. Run detection ifm at chosen index\n", common::MENU_OPT_RUN_INF_CHOSEN);
+ printf(" %u. Run detection on all ifm\n", common::MENU_OPT_RUN_INF_ALL);
+ printf(" %u. Show NN model info\n", common::MENU_OPT_SHOW_MODEL_INFO);
+ printf(" %u. List ifm\n\n", common::MENU_OPT_LIST_IFM);
+ printf(" Choice: ");
+ fflush(stdout);
+}
+
void main_loop(hal_platform& platform)
{
arm::app::YoloFastestModel model; /* Model wrapper object. */
@@ -40,8 +54,10 @@ void main_loop(hal_platform& platform)
caseContext.Set<hal_platform&>("platform", platform);
caseContext.Set<arm::app::Model&>("model", model);
caseContext.Set<uint32_t>("imgIndex", 0);
+ arm::app::object_detection::DetectorPostprocessing postp;
+ caseContext.Set<arm::app::object_detection::DetectorPostprocessing&>("postprocess", postp);
+
-
/* Loop. */
bool executionSuccessful = true;
constexpr bool bUseMenu = NUMBER_OF_FILES > 1 ? true : false;
diff --git a/source/use_case/object_detection/src/UseCaseHandler.cc b/source/use_case/object_detection/src/UseCaseHandler.cc
index 45df4f8..ce3ef06 100644
--- a/source/use_case/object_detection/src/UseCaseHandler.cc
+++ b/source/use_case/object_detection/src/UseCaseHandler.cc
@@ -18,19 +18,23 @@
#include "InputFiles.hpp"
#include "YoloFastestModel.hpp"
#include "UseCaseCommonUtils.hpp"
-#include "DetectionUseCaseUtils.hpp"
#include "DetectorPostProcessing.hpp"
#include "hal.h"
#include <inttypes.h>
-
-/* used for presentation, original images are read-only"*/
-static uint8_t g_image_buffer[INPUT_IMAGE_WIDTH*INPUT_IMAGE_HEIGHT*FORMAT_MULTIPLY_FACTOR] IFM_BUF_ATTRIBUTE = {};
-
namespace arm {
namespace app {
+ /**
+ * @brief Presents inference results along using the data presentation
+ * object.
+ * @param[in] platform Reference to the hal platform object.
+ * @param[in] results Vector of detection results to be displayed.
+ * @return true if successful, false otherwise.
+ **/
+ static bool PresentInferenceResult(hal_platform& platform,
+ const std::vector<arm::app::object_detection::DetectionResult>& results);
/* Object detection classification handler. */
bool ObjectDetectionHandler(ApplicationContext& ctx, uint32_t imgIndex, bool runAll)
@@ -48,7 +52,7 @@ namespace app {
platform.data_psn->clear(COLOR_BLACK);
auto& model = ctx.Get<Model&>("model");
-
+
/* If the request has a valid size, set the image index. */
if (imgIndex < NUMBER_OF_FILES) {
if (!SetAppCtxIfmIdx(ctx, imgIndex, "imgIndex")) {
@@ -76,9 +80,10 @@ namespace app {
const uint32_t nCols = inputShape->data[arm::app::YoloFastestModel::ms_inputColsIdx];
const uint32_t nRows = inputShape->data[arm::app::YoloFastestModel::ms_inputRowsIdx];
- const uint32_t nPresentationChannels = FORMAT_MULTIPLY_FACTOR;
+ const uint32_t nPresentationChannels = channelsImageDisplayed;
- std::vector<DetectionResult> results;
+ /* Get pre/post-processing objects. */
+ auto& postp = ctx.Get<object_detection::DetectorPostprocessing&>("postprocess");
do {
/* Strings for presentation/logging. */
@@ -86,19 +91,23 @@ namespace app {
const uint8_t* curr_image = get_img_array(ctx.Get<uint32_t>("imgIndex"));
- /* Copy over the data and convert to gryscale */
-#if DISPLAY_RGB_IMAGE
- memcpy(g_image_buffer,curr_image, INPUT_IMAGE_WIDTH*INPUT_IMAGE_HEIGHT*FORMAT_MULTIPLY_FACTOR);
-#else
- RgbToGrayscale(curr_image,g_image_buffer,INPUT_IMAGE_WIDTH,INPUT_IMAGE_HEIGHT);
-#endif /*DISPLAY_RGB_IMAGE*/
-
- RgbToGrayscale(curr_image,inputTensor->data.uint8,INPUT_IMAGE_WIDTH,INPUT_IMAGE_HEIGHT);
-
+ /* Copy over the data and convert to grayscale */
+ auto* dstPtr = static_cast<uint8_t*>(inputTensor->data.uint8);
+ const size_t copySz = inputTensor->bytes < IMAGE_DATA_SIZE ?
+ inputTensor->bytes : IMAGE_DATA_SIZE;
+
+ /* Copy of the image used for presentation, original images are read-only */
+ std::vector<uint8_t> g_image_buffer(nCols*nRows*channelsImageDisplayed);
+ if (nPresentationChannels == 3) {
+ memcpy(g_image_buffer.data(),curr_image, nCols * nRows * channelsImageDisplayed);
+ } else {
+ image::RgbToGrayscale(curr_image, g_image_buffer.data(), nCols * nRows);
+ }
+ image::RgbToGrayscale(curr_image, dstPtr, copySz);
/* Display this image on the LCD. */
platform.data_psn->present_data_image(
- g_image_buffer,
+ g_image_buffer.data(),
nCols, nRows, nPresentationChannels,
dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
@@ -125,27 +134,27 @@ namespace app {
dataPsnTxtInfStartX, dataPsnTxtInfStartY, 0);
/* Detector post-processing*/
- TfLiteTensor* output_arr[2] = {nullptr,nullptr};
- output_arr[0] = model.GetOutputTensor(0);
- output_arr[1] = model.GetOutputTensor(1);
- RunPostProcessing(g_image_buffer,output_arr,results);
+ std::vector<object_detection::DetectionResult> results;
+ TfLiteTensor* modelOutput0 = model.GetOutputTensor(0);
+ TfLiteTensor* modelOutput1 = model.GetOutputTensor(1);
+ postp.RunPostProcessing(
+ g_image_buffer.data(),
+ nRows,
+ nCols,
+ modelOutput0,
+ modelOutput1,
+ results);
platform.data_psn->present_data_image(
- g_image_buffer,
+ g_image_buffer.data(),
nCols, nRows, nPresentationChannels,
dataPsnImgStartX, dataPsnImgStartY, dataPsnImgDownscaleFactor);
- /*Detector post-processing*/
-
-
- /* Add results to context for access outside handler. */
- ctx.Set<std::vector<DetectionResult>>("results", results);
-
#if VERIFY_TEST_OUTPUT
arm::app::DumpTensor(outputTensor);
#endif /* VERIFY_TEST_OUTPUT */
- if (!image::PresentInferenceResult(platform, results)) {
+ if (!PresentInferenceResult(platform, results)) {
return false;
}
@@ -158,5 +167,24 @@ namespace app {
return true;
}
+
+ static bool PresentInferenceResult(hal_platform& platform,
+ const std::vector<arm::app::object_detection::DetectionResult>& results)
+ {
+ platform.data_psn->set_text_color(COLOR_GREEN);
+
+ /* If profiling is enabled, and the time is valid. */
+ info("Final results:\n");
+ info("Total number of inferences: 1\n");
+
+ for (uint32_t i = 0; i < results.size(); ++i) {
+ info("%" PRIu32 ") (%f) -> %s {x=%d,y=%d,w=%d,h=%d}\n", i,
+ results[i].m_normalisedVal, "Detection box:",
+ results[i].m_x0, results[i].m_y0, results[i].m_w, results[i].m_h );
+ }
+
+ return true;
+ }
+
} /* namespace app */
} /* namespace arm */
diff --git a/source/use_case/object_detection/usecase.cmake b/source/use_case/object_detection/usecase.cmake
index 15bf534..42c4f2c 100644
--- a/source/use_case/object_detection/usecase.cmake
+++ b/source/use_case/object_detection/usecase.cmake
@@ -22,13 +22,18 @@ USER_OPTION(${use_case}_FILE_PATH "Directory with custom image files to use, or
USER_OPTION(${use_case}_IMAGE_SIZE "Square image size in pixels. Images will be resized to this size."
192
STRING)
-
-add_compile_definitions(DISPLAY_RGB_IMAGE=1)
-add_compile_definitions(INPUT_IMAGE_WIDTH=${${use_case}_IMAGE_SIZE})
-add_compile_definitions(INPUT_IMAGE_HEIGHT=${${use_case}_IMAGE_SIZE})
-add_compile_definitions(ORIGINAL_IMAGE_WIDTH=${${use_case}_IMAGE_SIZE})
-add_compile_definitions(ORIGINAL_IMAGE_HEIGHT=${${use_case}_IMAGE_SIZE})
+USER_OPTION(${use_case}_ANCHOR_1 "First anchor array estimated during training."
+ "{38, 77, 47, 97, 61, 126}"
+ STRING)
+
+USER_OPTION(${use_case}_ANCHOR_2 "Second anchor array estimated during training."
+ "{14, 26, 19, 37, 28, 55 }"
+ STRING)
+
+USER_OPTION(${use_case}_CHANNELS_IMAGE_DISPLAYED "Channels' image displayed on the LCD. Select 1 if grayscale, 3 if RGB."
+ 3
+ BOOL)
# Generate input files
generate_images_code("${${use_case}_FILE_PATH}"
@@ -36,7 +41,6 @@ generate_images_code("${${use_case}_FILE_PATH}"
${INC_GEN_DIR}
"${${use_case}_IMAGE_SIZE}")
-
USER_OPTION(${use_case}_ACTIVATION_BUF_SZ "Activation buffer size for the chosen model"
0x00082000
STRING)
@@ -47,6 +51,19 @@ else()
set(DEFAULT_MODEL_PATH ${DEFAULT_MODEL_DIR}/yolo-fastest_192_face_v4.tflite)
endif()
+set(${use_case}_ORIGINAL_IMAGE_SIZE
+ ${${use_case}_IMAGE_SIZE}
+ CACHE STRING
+ "Original image size - for the post processing step to upscale the box co-ordinates.")
+
+set(EXTRA_MODEL_CODE
+ "extern const int originalImageSize = ${${use_case}_ORIGINAL_IMAGE_SIZE};"
+ "extern const int channelsImageDisplayed = ${${use_case}_CHANNELS_IMAGE_DISPLAYED};"
+ "/* NOTE: anchors are different for any given input model size, estimated during training phase */"
+ "extern const float anchor1[] = ${${use_case}_ANCHOR_1};"
+ "extern const float anchor2[] = ${${use_case}_ANCHOR_2};"
+ )
+
USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the evaluation application. Model files must be in tflite format."
${DEFAULT_MODEL_PATH}
FILEPATH
@@ -56,4 +73,5 @@ USER_OPTION(${use_case}_MODEL_TFLITE_PATH "NN models file to be used in the eval
generate_tflite_code(
MODEL_PATH ${${use_case}_MODEL_TFLITE_PATH}
DESTINATION ${SRC_GEN_DIR}
+ EXPRESSIONS ${EXTRA_MODEL_CODE}
)
diff --git a/source/use_case/vww/include/VisualWakeWordModel.hpp b/source/use_case/vww/include/VisualWakeWordModel.hpp
index ee3a7bf..1ed9202 100644
--- a/source/use_case/vww/include/VisualWakeWordModel.hpp
+++ b/source/use_case/vww/include/VisualWakeWordModel.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited. All rights reserved.
+ * Copyright (c) 2021 - 2022 Arm Limited. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
@@ -24,6 +24,12 @@ namespace app {
class VisualWakeWordModel : public Model {
+ public:
+ /* Indices for the expected model - based on input tensor shape */
+ static constexpr uint32_t ms_inputRowsIdx = 1;
+ static constexpr uint32_t ms_inputColsIdx = 2;
+ static constexpr uint32_t ms_inputChannelsIdx = 3;
+
protected:
/** @brief Gets the reference to op resolver interface class. */
const tflite::MicroOpResolver& GetOpResolver() override;
diff --git a/source/use_case/vww/src/UseCaseHandler.cc b/source/use_case/vww/src/UseCaseHandler.cc
index dbfe92b..e4dc479 100644
--- a/source/use_case/vww/src/UseCaseHandler.cc
+++ b/source/use_case/vww/src/UseCaseHandler.cc
@@ -50,8 +50,6 @@ namespace app {
constexpr uint32_t dataPsnTxtInfStartX = 150;
constexpr uint32_t dataPsnTxtInfStartY = 70;
- time_t infTimeMs = 0;
-
auto& model = ctx.Get<Model&>("model");
/* If the request has a valid size, set the image index. */
@@ -78,9 +76,13 @@ namespace app {
return false;
}
TfLiteIntArray* inputShape = model.GetInputShape(0);
- const uint32_t nCols = inputShape->data[2];
- const uint32_t nRows = inputShape->data[1];
- const uint32_t nChannels = (inputShape->size == 4) ? inputShape->data[3] : 1;
+ const uint32_t nCols = inputShape->data[arm::app::VisualWakeWordModel::ms_inputColsIdx];
+ const uint32_t nRows = inputShape->data[arm::app::VisualWakeWordModel::ms_inputRowsIdx];
+ if (arm::app::VisualWakeWordModel::ms_inputChannelsIdx >= static_cast<uint32_t>(inputShape->size)) {
+ printf_err("Invalid channel index.\n");
+ return false;
+ }
+ const uint32_t nChannels = inputShape->data[arm::app::VisualWakeWordModel::ms_inputChannelsIdx];
std::vector<ClassificationResult> results;
@@ -163,7 +165,11 @@ namespace app {
return false;
}
- const uint32_t nChannels = (inputTensor->dims->size == 4) ? inputTensor->dims->data[3] : 1;
+ if (arm::app::VisualWakeWordModel::ms_inputChannelsIdx >= static_cast<uint32_t>(inputTensor->dims->size)) {
+ printf_err("Invalid channel index.\n");
+ return false;
+ }
+ const uint32_t nChannels = inputTensor->dims->data[arm::app::VisualWakeWordModel::ms_inputChannelsIdx];
const uint8_t* srcPtr = get_img_array(imIdx);
auto* dstPtr = static_cast<uint8_t *>(inputTensor->data.data);
@@ -172,11 +178,7 @@ namespace app {
* Visual Wake Word model accepts only one channel =>
* Convert image to grayscale here
**/
- for (size_t i = 0; i < copySz; ++i, srcPtr += 3) {
- *dstPtr++ = 0.2989*(*srcPtr) +
- 0.587*(*(srcPtr+1)) +
- 0.114*(*(srcPtr+2));
- }
+ image::RgbToGrayscale(srcPtr, dstPtr, copySz);
} else {
memcpy(inputTensor->data.data, srcPtr, copySz);
}
@@ -186,4 +188,4 @@ namespace app {
}
} /* namespace app */
-} /* namespace arm */ \ No newline at end of file
+} /* namespace arm */