/* * Copyright (c) 2022 Arm Limited. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "DetectorPostProcessing.hpp" #include #include namespace arm { namespace app { namespace object_detection { DetectorPostprocessing::DetectorPostprocessing( const float threshold, const float nms, int numClasses, int topN) : m_threshold(threshold), m_nms(nms), m_numClasses(numClasses), m_topN(topN) {} void DetectorPostprocessing::RunPostProcessing( uint8_t* imgIn, uint32_t imgRows, uint32_t imgCols, TfLiteTensor* modelOutput0, TfLiteTensor* modelOutput1, std::vector& resultsOut) { /* init postprocessing */ Network net { .inputWidth = static_cast(imgCols), .inputHeight = static_cast(imgRows), .numClasses = m_numClasses, .branches = { Branch { .resolution = static_cast(imgCols/32), .numBox = 3, .anchor = anchor1, .modelOutput = modelOutput0->data.int8, .scale = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->scale->data[0], .zeroPoint = ((TfLiteAffineQuantization*)(modelOutput0->quantization.params))->zero_point->data[0], .size = modelOutput0->bytes }, Branch { .resolution = static_cast(imgCols/16), .numBox = 3, .anchor = anchor2, .modelOutput = modelOutput1->data.int8, .scale = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->scale->data[0], .zeroPoint = ((TfLiteAffineQuantization*)(modelOutput1->quantization.params))->zero_point->data[0], .size = modelOutput1->bytes } }, .topN = m_topN }; /* End init */ /* Start postprocessing */ int originalImageWidth = originalImageSize; int originalImageHeight = originalImageSize; std::forward_list detections; GetNetworkBoxes(net, originalImageWidth, originalImageHeight, m_threshold, detections); /* Do nms */ CalculateNMS(detections, net.numClasses, m_nms); for (auto& it: detections) { float xMin = it.bbox.x - it.bbox.w / 2.0f; float xMax = it.bbox.x + it.bbox.w / 2.0f; float yMin = it.bbox.y - it.bbox.h / 2.0f; float yMax = it.bbox.y + it.bbox.h / 2.0f; if (xMin < 0) { xMin = 0; } if (yMin < 0) { yMin = 0; } if (xMax > originalImageWidth) { xMax = originalImageWidth; } if (yMax > originalImageHeight) { yMax = originalImageHeight; } float boxX = xMin; float boxY = yMin; float boxWidth = xMax - xMin; float boxHeight = yMax - yMin; for (int j = 0; j < net.numClasses; ++j) { if (it.prob[j] > 0) { DetectionResult tmpResult = {}; tmpResult.m_normalisedVal = it.prob[j]; tmpResult.m_x0 = boxX; tmpResult.m_y0 = boxY; tmpResult.m_w = boxWidth; tmpResult.m_h = boxHeight; resultsOut.push_back(tmpResult); /* TODO: Instead of draw on the image, return the boxes and draw on the LCD */ DrawBoxOnImage(imgIn, originalImageWidth, originalImageHeight, boxX, boxY, boxWidth, boxHeight);; } } } } float DetectorPostprocessing::Sigmoid(float x) { return 1.f/(1.f + exp(-x)); } void DetectorPostprocessing::InsertTopNDetections(std::forward_list& detections, Detection& det) { std::forward_list::iterator it; std::forward_list::iterator last_it; for ( it = detections.begin(); it != detections.end(); ++it ) { if(it->objectness > det.objectness) break; last_it = it; } if(it != detections.begin()) { detections.emplace_after(last_it, det); detections.pop_front(); } } void DetectorPostprocessing::GetNetworkBoxes(Network& net, int imageWidth, int imageHeight, float threshold, std::forward_list& detections) { int numClasses = net.numClasses; int num = 0; auto det_objectness_comparator = [](Detection& pa, Detection& pb) { return pa.objectness < pb.objectness; }; for (size_t i = 0; i < net.branches.size(); ++i) { int height = net.branches[i].resolution; int width = net.branches[i].resolution; int channel = net.branches[i].numBox*(5+numClasses); for (int h = 0; h < net.branches[i].resolution; h++) { for (int w = 0; w < net.branches[i].resolution; w++) { for (int anc = 0; anc < net.branches[i].numBox; anc++) { /* Objectness score */ int bbox_obj_offset = h * width * channel + w * channel + anc * (numClasses + 5) + 4; float objectness = Sigmoid(((float)net.branches[i].modelOutput[bbox_obj_offset] - net.branches[i].zeroPoint) * net.branches[i].scale); if(objectness > threshold) { Detection det; det.objectness = objectness; /* Get bbox prediction data for each anchor, each feature point */ int bbox_x_offset = bbox_obj_offset -4; int bbox_y_offset = bbox_x_offset + 1; int bbox_w_offset = bbox_x_offset + 2; int bbox_h_offset = bbox_x_offset + 3; int bbox_scores_offset = bbox_x_offset + 5; det.bbox.x = ((float)net.branches[i].modelOutput[bbox_x_offset] - net.branches[i].zeroPoint) * net.branches[i].scale; det.bbox.y = ((float)net.branches[i].modelOutput[bbox_y_offset] - net.branches[i].zeroPoint) * net.branches[i].scale; det.bbox.w = ((float)net.branches[i].modelOutput[bbox_w_offset] - net.branches[i].zeroPoint) * net.branches[i].scale; det.bbox.h = ((float)net.branches[i].modelOutput[bbox_h_offset] - net.branches[i].zeroPoint) * net.branches[i].scale; float bbox_x, bbox_y; /* Eliminate grid sensitivity trick involved in YOLOv4 */ bbox_x = Sigmoid(det.bbox.x); bbox_y = Sigmoid(det.bbox.y); det.bbox.x = (bbox_x + w) / width; det.bbox.y = (bbox_y + h) / height; det.bbox.w = exp(det.bbox.w) * net.branches[i].anchor[anc*2] / net.inputWidth; det.bbox.h = exp(det.bbox.h) * net.branches[i].anchor[anc*2+1] / net.inputHeight; for (int s = 0; s < numClasses; s++) { float sig = Sigmoid(((float)net.branches[i].modelOutput[bbox_scores_offset + s] - net.branches[i].zeroPoint) * net.branches[i].scale)*objectness; det.prob.emplace_back((sig > threshold) ? sig : 0); } /* Correct_YOLO_boxes */ det.bbox.x *= imageWidth; det.bbox.w *= imageWidth; det.bbox.y *= imageHeight; det.bbox.h *= imageHeight; if (num < net.topN || net.topN <=0) { detections.emplace_front(det); num += 1; } else if (num == net.topN) { detections.sort(det_objectness_comparator); InsertTopNDetections(detections,det); num += 1; } else { InsertTopNDetections(detections,det); } } } } } } if(num > net.topN) num -=1; } float DetectorPostprocessing::Calculate1DOverlap(float x1Center, float width1, float x2Center, float width2) { float left_1 = x1Center - width1/2; float left_2 = x2Center - width2/2; float leftest = left_1 > left_2 ? left_1 : left_2; float right_1 = x1Center + width1/2; float right_2 = x2Center + width2/2; float rightest = right_1 < right_2 ? right_1 : right_2; return rightest - leftest; } float DetectorPostprocessing::CalculateBoxIntersect(Box& box1, Box& box2) { float width = Calculate1DOverlap(box1.x, box1.w, box2.x, box2.w); if (width < 0) { return 0; } float height = Calculate1DOverlap(box1.y, box1.h, box2.y, box2.h); if (height < 0) { return 0; } float total_area = width*height; return total_area; } float DetectorPostprocessing::CalculateBoxUnion(Box& box1, Box& box2) { float boxes_intersection = CalculateBoxIntersect(box1, box2); float boxes_union = box1.w * box1.h + box2.w * box2.h - boxes_intersection; return boxes_union; } float DetectorPostprocessing::CalculateBoxIOU(Box& box1, Box& box2) { float boxes_intersection = CalculateBoxIntersect(box1, box2); if (boxes_intersection == 0) { return 0; } float boxes_union = CalculateBoxUnion(box1, box2); if (boxes_union == 0) { return 0; } return boxes_intersection / boxes_union; } void DetectorPostprocessing::CalculateNMS(std::forward_list& detections, int classes, float iouThreshold) { int idxClass{0}; auto CompareProbs = [idxClass](Detection& prob1, Detection& prob2) { return prob1.prob[idxClass] > prob2.prob[idxClass]; }; for (idxClass = 0; idxClass < classes; ++idxClass) { detections.sort(CompareProbs); for (std::forward_list::iterator it=detections.begin(); it != detections.end(); ++it) { if (it->prob[idxClass] == 0) continue; for (std::forward_list::iterator itc=std::next(it, 1); itc != detections.end(); ++itc) { if (itc->prob[idxClass] == 0) { continue; } if (CalculateBoxIOU(it->bbox, itc->bbox) > iouThreshold) { itc->prob[idxClass] = 0; } } } } } void DetectorPostprocessing::DrawBoxOnImage(uint8_t* imgIn, int imWidth, int imHeight, int boxX,int boxY, int boxWidth, int boxHeight) { auto CheckAndFixOffset = [](int im_width,int im_height,int& offset) { if ( (offset) >= im_width*im_height*channelsImageDisplayed) { offset = im_width * im_height * channelsImageDisplayed -1; } else if ( (offset) < 0) { offset = 0; } }; /* Consistency checks */ if (!imgIn) { return; } int offset=0; for (int i=0; i < boxWidth; i++) { /* Draw two horizontal lines */ for (int line=0; line < 2; line++) { /*top*/ offset =(i + (boxY + line)*imWidth + boxX) * channelsImageDisplayed; /* channelsImageDisplayed for rgb or grayscale*/ CheckAndFixOffset(imWidth,imHeight,offset); imgIn[offset] = 0xFF; /*bottom*/ offset = (i + (boxY + boxHeight - line)*imWidth + boxX) * channelsImageDisplayed; CheckAndFixOffset(imWidth,imHeight,offset); imgIn[offset] = 0xFF; } } for (int i=0; i < boxHeight; i++) { /* Draw two vertical lines */ for (int line=0; line < 2; line++) { /*left*/ offset = ((i + boxY)*imWidth + boxX + line)*channelsImageDisplayed; CheckAndFixOffset(imWidth,imHeight,offset); imgIn[offset] = 0xFF; /*right*/ offset = ((i + boxY)*imWidth + boxX + boxWidth - line)*channelsImageDisplayed; CheckAndFixOffset(imWidth,imHeight, offset); imgIn[offset] = 0xFF; } } } } /* namespace object_detection */ } /* namespace app */ } /* namespace arm */