/* * Copyright (c) 2022 Arm Limited. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "DetectorPostProcessing.hpp" #include #include #include #include typedef struct boxabs { float left, right, top, bot; } boxabs; typedef struct branch { int resolution; int num_box; float *anchor; int8_t *tf_output; float scale; int zero_point; size_t size; float scale_x_y; } branch; typedef struct network { int input_w; int input_h; int num_classes; int num_branch; branch *branchs; int topN; } network; typedef struct box { float x, y, w, h; } box; typedef struct detection{ box bbox; float *prob; float objectness; } detection; static int sort_class; static void free_dets(std::forward_list &dets){ std::forward_list::iterator it; for ( it = dets.begin(); it != dets.end(); ++it ){ free(it->prob); } } float sigmoid(float x) { return 1.f/(1.f + exp(-x)); } static bool det_objectness_comparator(detection &pa, detection &pb) { return pa.objectness < pb.objectness; } static void insert_topN_det(std::forward_list &dets, detection det) { std::forward_list::iterator it; std::forward_list::iterator last_it; for ( it = dets.begin(); it != dets.end(); ++it ){ if(it->objectness > det.objectness) break; last_it = it; } if(it != dets.begin()){ dets.emplace_after(last_it, det); free(dets.begin()->prob); dets.pop_front(); } else{ free(det.prob); } } static std::forward_list get_network_boxes(network *net, int image_w, int image_h, float thresh, int *num) { std::forward_list dets; int i; int num_classes = net->num_classes; *num = 0; for (i = 0; i < net->num_branch; ++i) { int height = net->branchs[i].resolution; int width = net->branchs[i].resolution; int channel = net->branchs[i].num_box*(5+num_classes); for (int h = 0; h < net->branchs[i].resolution; h++) { for (int w = 0; w < net->branchs[i].resolution; w++) { for (int anc = 0; anc < net->branchs[i].num_box; anc++) { // objectness score int bbox_obj_offset = h * width * channel + w * channel + anc * (num_classes + 5) + 4; float objectness = sigmoid(((float)net->branchs[i].tf_output[bbox_obj_offset] - net->branchs[i].zero_point) * net->branchs[i].scale); if(objectness > thresh){ detection det; det.prob = (float*)calloc(num_classes, sizeof(float)); det.objectness = objectness; //get bbox prediction data for each anchor, each feature point int bbox_x_offset = bbox_obj_offset -4; int bbox_y_offset = bbox_x_offset + 1; int bbox_w_offset = bbox_x_offset + 2; int bbox_h_offset = bbox_x_offset + 3; int bbox_scores_offset = bbox_x_offset + 5; //int bbox_scores_step = 1; det.bbox.x = ((float)net->branchs[i].tf_output[bbox_x_offset] - net->branchs[i].zero_point) * net->branchs[i].scale; det.bbox.y = ((float)net->branchs[i].tf_output[bbox_y_offset] - net->branchs[i].zero_point) * net->branchs[i].scale; det.bbox.w = ((float)net->branchs[i].tf_output[bbox_w_offset] - net->branchs[i].zero_point) * net->branchs[i].scale; det.bbox.h = ((float)net->branchs[i].tf_output[bbox_h_offset] - net->branchs[i].zero_point) * net->branchs[i].scale; float bbox_x, bbox_y; // Eliminate grid sensitivity trick involved in YOLOv4 bbox_x = sigmoid(det.bbox.x); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2; bbox_y = sigmoid(det.bbox.y); //* net->branchs[i].scale_x_y - (net->branchs[i].scale_x_y - 1) / 2; det.bbox.x = (bbox_x + w) / width; det.bbox.y = (bbox_y + h) / height; det.bbox.w = exp(det.bbox.w) * net->branchs[i].anchor[anc*2] / net->input_w; det.bbox.h = exp(det.bbox.h) * net->branchs[i].anchor[anc*2+1] / net->input_h; for (int s = 0; s < num_classes; s++) { det.prob[s] = sigmoid(((float)net->branchs[i].tf_output[bbox_scores_offset + s] - net->branchs[i].zero_point) * net->branchs[i].scale)*objectness; det.prob[s] = (det.prob[s] > thresh) ? det.prob[s] : 0; } //correct_yolo_boxes det.bbox.x *= image_w; det.bbox.w *= image_w; det.bbox.y *= image_h; det.bbox.h *= image_h; if (*num < net->topN || net->topN <=0){ dets.emplace_front(det); *num += 1; } else if(*num == net->topN){ dets.sort(det_objectness_comparator); insert_topN_det(dets,det); *num += 1; }else{ insert_topN_det(dets,det); } } } } } } if(*num > net->topN) *num -=1; return dets; } // init part static branch create_brach(int resolution, int num_box, float *anchor, int8_t *tf_output, size_t size, float scale, int zero_point) { branch b; b.resolution = resolution; b.num_box = num_box; b.anchor = anchor; b.tf_output = tf_output; b.size = size; b.scale = scale; b.zero_point = zero_point; return b; } static network creat_network(int input_w, int input_h, int num_classes, int num_branch, branch* branchs, int topN) { network net; net.input_w = input_w; net.input_h = input_h; net.num_classes = num_classes; net.num_branch = num_branch; net.branchs = branchs; net.topN = topN; return net; } // NMS part static float Calc1DOverlap(float x1_center, float width1, float x2_center, float width2) { float left_1 = x1_center - width1/2; float left_2 = x2_center - width2/2; float leftest; if (left_1 > left_2) { leftest = left_1; } else { leftest = left_2; } float right_1 = x1_center + width1/2; float right_2 = x2_center + width2/2; float rightest; if (right_1 < right_2) { rightest = right_1; } else { rightest = right_2; } return rightest - leftest; } static float CalcBoxIntersect(box box1, box box2) { float width = Calc1DOverlap(box1.x, box1.w, box2.x, box2.w); if (width < 0) return 0; float height = Calc1DOverlap(box1.y, box1.h, box2.y, box2.h); if (height < 0) return 0; float total_area = width*height; return total_area; } static float CalcBoxUnion(box box1, box box2) { float boxes_intersection = CalcBoxIntersect(box1, box2); float boxes_union = box1.w*box1.h + box2.w*box2.h - boxes_intersection; return boxes_union; } static float CalcBoxIOU(box box1, box box2) { float boxes_intersection = CalcBoxIntersect(box1, box2); if (boxes_intersection == 0) return 0; float boxes_union = CalcBoxUnion(box1, box2); if (boxes_union == 0) return 0; return boxes_intersection / boxes_union; } static bool CompareProbs(detection &prob1, detection &prob2) { return prob1.prob[sort_class] > prob2.prob[sort_class]; } static void CalcNMS(std::forward_list &detections, int classes, float iou_threshold) { int k; for (k = 0; k < classes; ++k) { sort_class = k; detections.sort(CompareProbs); for (std::forward_list::iterator it=detections.begin(); it != detections.end(); ++it){ if (it->prob[k] == 0) continue; for (std::forward_list::iterator itc=std::next(it, 1); itc != detections.end(); ++itc){ if (itc->prob[k] == 0) continue; if (CalcBoxIOU(it->bbox, itc->bbox) > iou_threshold) { itc->prob[k] = 0; } } } } } static void inline check_and_fix_offset(int im_w,int im_h,int *offset) { if (!offset) return; if ( (*offset) >= im_w*im_h*FORMAT_MULTIPLY_FACTOR) (*offset) = im_w*im_h*FORMAT_MULTIPLY_FACTOR -1; else if ( (*offset) < 0) *offset =0; } static void DrawBoxOnImage(uint8_t *img_in,int im_w,int im_h,int bx,int by,int bw,int bh) { if (!img_in) { return; } int offset=0; for (int i=0; i < bw; i++) { /*draw two lines */ for (int line=0; line < 2; line++) { /*top*/ offset =(i + (by + line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR; check_and_fix_offset(im_w,im_h,&offset); img_in[offset] = 0xFF; /* FORMAT_MULTIPLY_FACTOR for rgb or grayscale*/ /*bottom*/ offset = (i + (by + bh - line)*im_w + bx)*FORMAT_MULTIPLY_FACTOR; check_and_fix_offset(im_w,im_h,&offset); img_in[offset] = 0xFF; } } for (int i=0; i < bh; i++) { /*draw two lines */ for (int line=0; line < 2; line++) { /*left*/ offset = ((i + by)*im_w + bx + line)*FORMAT_MULTIPLY_FACTOR; check_and_fix_offset(im_w,im_h,&offset); img_in[offset] = 0xFF; /*right*/ offset = ((i + by)*im_w + bx + bw - line)*FORMAT_MULTIPLY_FACTOR; check_and_fix_offset(im_w,im_h,&offset); img_in[offset] = 0xFF; } } } void arm::app::RunPostProcessing(uint8_t *img_in,TfLiteTensor* model_output[2],std::vector & results_out) { TfLiteTensor* output[2] = {nullptr,nullptr}; int input_w = INPUT_IMAGE_WIDTH; int input_h = INPUT_IMAGE_HEIGHT; for(int anchor=0;anchor<2;anchor++) { output[anchor] = model_output[anchor]; } /* init postprocessing */ int num_classes = 1; int num_branch = 2; int topN = 0; branch* branchs = (branch*)calloc(num_branch, sizeof(branch)); /*NOTE: anchors are different for any given input model size, estimated during training phase */ float anchor1[] = {38, 77, 47, 97, 61, 126}; float anchor2[] = {14, 26, 19, 37, 28, 55 }; branchs[0] = create_brach(INPUT_IMAGE_WIDTH/32, 3, anchor1, output[0]->data.int8, output[0]->bytes, ((TfLiteAffineQuantization*)(output[0]->quantization.params))->scale->data[0], ((TfLiteAffineQuantization*)(output[0]->quantization.params))->zero_point->data[0]); branchs[1] = create_brach(INPUT_IMAGE_WIDTH/16, 3, anchor2, output[1]->data.int8, output[1]->bytes, ((TfLiteAffineQuantization*)(output[1]->quantization.params))->scale->data[0],((TfLiteAffineQuantization*)(output[1]->quantization.params))->zero_point->data[0]); network net = creat_network(input_w, input_h, num_classes, num_branch, branchs,topN); /* end init */ /* start postprocessing */ int nboxes=0; float thresh = .5;//50% float nms = .45; int orig_image_width = ORIGINAL_IMAGE_WIDTH; int orig_image_height = ORIGINAL_IMAGE_HEIGHT; std::forward_list dets = get_network_boxes(&net, orig_image_width, orig_image_height, thresh, &nboxes); /* do nms */ CalcNMS(dets, net.num_classes, nms); uint8_t temp_unsuppressed_counter = 0; int j; for (std::forward_list::iterator it=dets.begin(); it != dets.end(); ++it){ float xmin = it->bbox.x - it->bbox.w / 2.0f; float xmax = it->bbox.x + it->bbox.w / 2.0f; float ymin = it->bbox.y - it->bbox.h / 2.0f; float ymax = it->bbox.y + it->bbox.h / 2.0f; if (xmin < 0) xmin = 0; if (ymin < 0) ymin = 0; if (xmax > orig_image_width) xmax = orig_image_width; if (ymax > orig_image_height) ymax = orig_image_height; float bx = xmin; float by = ymin; float bw = xmax - xmin; float bh = ymax - ymin; for (j = 0; j < net.num_classes; ++j) { if (it->prob[j] > 0) { arm::app::DetectionResult tmp_result = {}; tmp_result.m_normalisedVal = it->prob[j]; tmp_result.m_x0=bx; tmp_result.m_y0=by; tmp_result.m_w=bw; tmp_result.m_h=bh; results_out.push_back(tmp_result); DrawBoxOnImage(img_in,orig_image_width,orig_image_height,bx,by,bw,bh); temp_unsuppressed_counter++; } } } free_dets(dets); free(branchs); } void arm::app::RgbToGrayscale(const uint8_t *rgb,uint8_t *gray, int im_w,int im_h) { float R=0.299; float G=0.587; float B=0.114; for (int i=0; i< im_w*im_h; i++ ) { uint32_t int_gray = rgb[i*3 + 0]*R + rgb[i*3 + 1]*G+ rgb[i*3 + 2]*B; /*clip if need */ if (int_gray <= UINT8_MAX) { gray[i] = int_gray; } else { gray[i] = UINT8_MAX; } } }