summaryrefslogtreecommitdiff
path: root/source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc
diff options
context:
space:
mode:
Diffstat (limited to 'source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc')
-rw-r--r--source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc240
1 files changed, 240 insertions, 0 deletions
diff --git a/source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc b/source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc
new file mode 100644
index 0000000..fb1606a
--- /dev/null
+++ b/source/application/api/use_case/object_detection/src/DetectorPostProcessing.cc
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) 2022 Arm Limited. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "DetectorPostProcessing.hpp"
+#include "PlatformMath.hpp"
+
+#include <cmath>
+
+namespace arm {
+namespace app {
+
+ DetectorPostProcess::DetectorPostProcess(
+ TfLiteTensor* modelOutput0,
+ TfLiteTensor* modelOutput1,
+ std::vector<object_detection::DetectionResult>& results,
+ int inputImgRows,
+ int inputImgCols,
+ const float threshold,
+ const float nms,
+ int numClasses,
+ int topN)
+ : m_outputTensor0{modelOutput0},
+ m_outputTensor1{modelOutput1},
+ m_results{results},
+ m_inputImgRows{inputImgRows},
+ m_inputImgCols{inputImgCols},
+ m_threshold(threshold),
+ m_nms(nms),
+ m_numClasses(numClasses),
+ m_topN(topN)
+{
+ /* Init PostProcessing */
+ this->m_net =
+ object_detection::Network {
+ .inputWidth = inputImgCols,
+ .inputHeight = inputImgRows,
+ .numClasses = numClasses,
+ .branches = {
+ object_detection::Branch {
+ .resolution = inputImgCols/32,
+ .numBox = 3,
+ .anchor = anchor1,
+ .modelOutput = this->m_outputTensor0->data.int8,
+ .scale = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor0->quantization.params))->scale->data[0],
+ .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor0->quantization.params))->zero_point->data[0],
+ .size = this->m_outputTensor0->bytes
+ },
+ object_detection::Branch {
+ .resolution = inputImgCols/16,
+ .numBox = 3,
+ .anchor = anchor2,
+ .modelOutput = this->m_outputTensor1->data.int8,
+ .scale = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor1->quantization.params))->scale->data[0],
+ .zeroPoint = (static_cast<TfLiteAffineQuantization*>(
+ this->m_outputTensor1->quantization.params))->zero_point->data[0],
+ .size = this->m_outputTensor1->bytes
+ }
+ },
+ .topN = m_topN
+ };
+ /* End init */
+}
+
+bool DetectorPostProcess::DoPostProcess()
+{
+ /* Start postprocessing */
+ int originalImageWidth = originalImageSize;
+ int originalImageHeight = originalImageSize;
+
+ std::forward_list<image::Detection> detections;
+ GetNetworkBoxes(this->m_net, originalImageWidth, originalImageHeight, m_threshold, detections);
+
+ /* Do nms */
+ CalculateNMS(detections, this->m_net.numClasses, m_nms);
+
+ for (auto& it: detections) {
+ float xMin = it.bbox.x - it.bbox.w / 2.0f;
+ float xMax = it.bbox.x + it.bbox.w / 2.0f;
+ float yMin = it.bbox.y - it.bbox.h / 2.0f;
+ float yMax = it.bbox.y + it.bbox.h / 2.0f;
+
+ if (xMin < 0) {
+ xMin = 0;
+ }
+ if (yMin < 0) {
+ yMin = 0;
+ }
+ if (xMax > originalImageWidth) {
+ xMax = originalImageWidth;
+ }
+ if (yMax > originalImageHeight) {
+ yMax = originalImageHeight;
+ }
+
+ float boxX = xMin;
+ float boxY = yMin;
+ float boxWidth = xMax - xMin;
+ float boxHeight = yMax - yMin;
+
+ for (int j = 0; j < this->m_net.numClasses; ++j) {
+ if (it.prob[j] > 0) {
+
+ object_detection::DetectionResult tmpResult = {};
+ tmpResult.m_normalisedVal = it.prob[j];
+ tmpResult.m_x0 = boxX;
+ tmpResult.m_y0 = boxY;
+ tmpResult.m_w = boxWidth;
+ tmpResult.m_h = boxHeight;
+
+ this->m_results.push_back(tmpResult);
+ }
+ }
+ }
+ return true;
+}
+
+void DetectorPostProcess::InsertTopNDetections(std::forward_list<image::Detection>& detections, image::Detection& det)
+{
+ std::forward_list<image::Detection>::iterator it;
+ std::forward_list<image::Detection>::iterator last_it;
+ for ( it = detections.begin(); it != detections.end(); ++it ) {
+ if(it->objectness > det.objectness)
+ break;
+ last_it = it;
+ }
+ if(it != detections.begin()) {
+ detections.emplace_after(last_it, det);
+ detections.pop_front();
+ }
+}
+
+void DetectorPostProcess::GetNetworkBoxes(
+ object_detection::Network& net,
+ int imageWidth,
+ int imageHeight,
+ float threshold,
+ std::forward_list<image::Detection>& detections)
+{
+ int numClasses = net.numClasses;
+ int num = 0;
+ auto det_objectness_comparator = [](image::Detection& pa, image::Detection& pb) {
+ return pa.objectness < pb.objectness;
+ };
+ for (size_t i = 0; i < net.branches.size(); ++i) {
+ int height = net.branches[i].resolution;
+ int width = net.branches[i].resolution;
+ int channel = net.branches[i].numBox*(5+numClasses);
+
+ for (int h = 0; h < net.branches[i].resolution; h++) {
+ for (int w = 0; w < net.branches[i].resolution; w++) {
+ for (int anc = 0; anc < net.branches[i].numBox; anc++) {
+
+ /* Objectness score */
+ int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4;
+ float objectness = math::MathUtils::SigmoidF32(
+ (static_cast<float>(net.branches[i].modelOutput[bbox_obj_offset])
+ - net.branches[i].zeroPoint
+ ) * net.branches[i].scale);
+
+ if(objectness > threshold) {
+ image::Detection det;
+ det.objectness = objectness;
+ /* Get bbox prediction data for each anchor, each feature point */
+ int bbox_x_offset = bbox_obj_offset -4;
+ int bbox_y_offset = bbox_x_offset + 1;
+ int bbox_w_offset = bbox_x_offset + 2;
+ int bbox_h_offset = bbox_x_offset + 3;
+ int bbox_scores_offset = bbox_x_offset + 5;
+
+ det.bbox.x = (static_cast<float>(net.branches[i].modelOutput[bbox_x_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.y = (static_cast<float>(net.branches[i].modelOutput[bbox_y_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.w = (static_cast<float>(net.branches[i].modelOutput[bbox_w_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+ det.bbox.h = (static_cast<float>(net.branches[i].modelOutput[bbox_h_offset])
+ - net.branches[i].zeroPoint) * net.branches[i].scale;
+
+ float bbox_x, bbox_y;
+
+ /* Eliminate grid sensitivity trick involved in YOLOv4 */
+ bbox_x = math::MathUtils::SigmoidF32(det.bbox.x);
+ bbox_y = math::MathUtils::SigmoidF32(det.bbox.y);
+ det.bbox.x = (bbox_x + w) / width;
+ det.bbox.y = (bbox_y + h) / height;
+
+ det.bbox.w = std::exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth;
+ det.bbox.h = std::exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight;
+
+ for (int s = 0; s < numClasses; s++) {
+ float sig = math::MathUtils::SigmoidF32(
+ (static_cast<float>(net.branches[i].modelOutput[bbox_scores_offset + s]) -
+ net.branches[i].zeroPoint) * net.branches[i].scale
+ ) * objectness;
+ det.prob.emplace_back((sig > threshold) ? sig : 0);
+ }
+
+ /* Correct_YOLO_boxes */
+ det.bbox.x *= imageWidth;
+ det.bbox.w *= imageWidth;
+ det.bbox.y *= imageHeight;
+ det.bbox.h *= imageHeight;
+
+ if (num < net.topN || net.topN <=0) {
+ detections.emplace_front(det);
+ num += 1;
+ } else if (num == net.topN) {
+ detections.sort(det_objectness_comparator);
+ InsertTopNDetections(detections,det);
+ num += 1;
+ } else {
+ InsertTopNDetections(detections,det);
+ }
+ }
+ }
+ }
+ }
+ }
+ if(num > net.topN)
+ num -=1;
+}
+
+} /* namespace app */
+} /* namespace arm */