/* * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include #include namespace arm_compute { namespace { template std::vector SoftNMS(const ITensor *proposals, std::vector> &scores_in, std::vector inds, const BoxNMSLimitInfo &info, int class_id) { std::vector keep; const int proposals_width = proposals->info()->dimension(1); std::vector x1(proposals_width); std::vector y1(proposals_width); std::vector x2(proposals_width); std::vector y2(proposals_width); std::vector areas(proposals_width); for(int i = 0; i < proposals_width; ++i) { x1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4, i))); y1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i))); x2[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i))); y2[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i))); areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0); } // Note: Soft NMS scores have already been initialized with input scores while(!inds.empty()) { // Find proposal with max score among remaining proposals int max_pos = 0; for(unsigned int i = 1; i < inds.size(); ++i) { if(scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)]) { max_pos = i; } } int element = inds.at(max_pos); keep.push_back(element); std::swap(inds.at(0), inds.at(max_pos)); // Remove first element and compute IoU of the remaining boxes with identified max box inds.erase(inds.begin()); std::vector sorted_indices_temp; for(auto idx : inds) { const auto xx1 = std::max(x1[idx], x1[element]); const auto yy1 = std::max(y1[idx], y1[element]); const auto xx2 = std::min(x2[idx], x2[element]); const auto yy2 = std::min(y2[idx], y2[element]); const auto w = std::max((xx2 - xx1 + 1.f), 0.f); const auto h = std::max((yy2 - yy1 + 1.f), 0.f); const auto inter = w * h; const auto ovr = inter / (areas[element] + areas[idx] - inter); // Update scores based on computed IoU, overlap threshold and NMS method T weight; switch(info.soft_nms_method()) { case NMSType::LINEAR: weight = (ovr > info.nms()) ? (1.f - ovr) : 1.f; break; case NMSType::GAUSSIAN: // Gaussian weight = std::exp(-1.f * ovr * ovr / info.soft_nms_sigma()); break; case NMSType::ORIGINAL: // Original NMS weight = (ovr > info.nms()) ? 0.f : 1.f; break; default: ARM_COMPUTE_ERROR("Not supported"); } // Discard boxes with new scores below min threshold and update pending indices scores_in[class_id][idx] *= weight; if(scores_in[class_id][idx] >= info.soft_nms_min_score_thres()) { sorted_indices_temp.push_back(idx); } } inds = sorted_indices_temp; } return keep; } template std::vector NonMaximaSuppression(const ITensor *proposals, std::vector sorted_indices, const BoxNMSLimitInfo &info, int class_id) { std::vector keep; const int proposals_width = proposals->info()->dimension(1); std::vector x1(proposals_width); std::vector y1(proposals_width); std::vector x2(proposals_width); std::vector y2(proposals_width); std::vector areas(proposals_width); for(int i = 0; i < proposals_width; ++i) { x1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4, i))); y1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i))); x2[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i))); y2[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i))); areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0); } while(!sorted_indices.empty()) { int i = sorted_indices.at(0); keep.push_back(i); std::vector sorted_indices_temp = sorted_indices; std::vector new_indices; sorted_indices_temp.erase(sorted_indices_temp.begin()); for(unsigned int j = 0; j < sorted_indices_temp.size(); ++j) { const float xx1 = std::max(x1[sorted_indices_temp.at(j)], x1[i]); const float yy1 = std::max(y1[sorted_indices_temp.at(j)], y1[i]); const float xx2 = std::min(x2[sorted_indices_temp.at(j)], x2[i]); const float yy2 = std::min(y2[sorted_indices_temp.at(j)], y2[i]); const float w = std::max((xx2 - xx1 + 1.f), 0.f); const float h = std::max((yy2 - yy1 + 1.f), 0.f); const float inter = w * h; const float ovr = inter / (areas[i] + areas[sorted_indices_temp.at(j)] - inter); const float ctr_x = xx1 + (w / 2); const float ctr_y = yy1 + (h / 2); // If suppress_size is specified, filter the boxes based on their size and position const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() && ctr_x < info.im_width() && ctr_y < info.im_height()); if(ovr <= info.nms() && keep_size) { new_indices.push_back(j); } } const unsigned int new_indices_size = new_indices.size(); std::vector new_sorted_indices(new_indices_size); for(unsigned int i = 0; i < new_indices_size; ++i) { new_sorted_indices[i] = sorted_indices[new_indices[i] + 1]; } sorted_indices = new_sorted_indices; } return keep; } } // namespace CPPBoxWithNonMaximaSuppressionLimitKernel::CPPBoxWithNonMaximaSuppressionLimitKernel() : _scores_in(nullptr), _boxes_in(nullptr), _batch_splits_in(nullptr), _scores_out(nullptr), _boxes_out(nullptr), _classes(nullptr), _batch_splits_out(nullptr), _keeps(nullptr), _keeps_size(nullptr), _info() { } bool CPPBoxWithNonMaximaSuppressionLimitKernel::is_parallelisable() const { return false; } template void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() { const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0); const int num_classes = _scores_in->info()->dimension(0); const int scores_count = _scores_in->info()->dimension(1); std::vector total_keep_per_batch(batch_size); std::vector> keeps(num_classes); int total_keep_count = 0; std::vector> in_scores(num_classes, std::vector(scores_count)); for(int i = 0; i < scores_count; ++i) { for(int j = 0; j < num_classes; ++j) { in_scores[j][i] = *reinterpret_cast(_scores_in->ptr_to_element(Coordinates(j, i))); } } int offset = 0; int cur_start_idx = 0; for(int b = 0; b < batch_size; ++b) { const int num_boxes = _batch_splits_in == nullptr ? 1 : static_cast(*reinterpret_cast(_batch_splits_in->ptr_to_element(Coordinates(b)))); // Skip first class if there is more than 1 except if the number of classes is 1. const int j_start = (num_classes == 1 ? 0 : 1); for(int j = j_start; j < num_classes; ++j) { std::vector cur_scores(scores_count); std::vector inds; for(int i = 0; i < scores_count; ++i) { const T score = in_scores[j][i]; cur_scores[i] = score; if(score > _info.score_thresh()) { inds.push_back(i); } } if(_info.soft_nms_enabled()) { keeps[j] = SoftNMS(_boxes_in, in_scores, inds, _info, j); } else { std::sort(inds.data(), inds.data() + inds.size(), [&cur_scores](int lhs, int rhs) { return cur_scores[lhs] > cur_scores[rhs]; }); keeps[j] = NonMaximaSuppression(_boxes_in, inds, _info, j); } total_keep_count += keeps[j].size(); } if(_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im()) { // merge all scores (represented by indices) together and sort auto get_all_scores_sorted = [&in_scores, &keeps, total_keep_count]() { std::vector ret(total_keep_count); int ret_idx = 0; for(unsigned int i = 1; i < keeps.size(); ++i) { auto &cur_keep = keeps[i]; for(auto &ckv : cur_keep) { ret[ret_idx++] = in_scores[i][ckv]; } } std::sort(ret.data(), ret.data() + ret.size()); return ret; }; auto all_scores_sorted = get_all_scores_sorted(); const T image_thresh = all_scores_sorted[all_scores_sorted.size() - _info.detections_per_im()]; for(int j = 1; j < num_classes; ++j) { auto &cur_keep = keeps[j]; std::vector new_keeps_j; for(auto &k : cur_keep) { if(in_scores[j][k] >= image_thresh) { new_keeps_j.push_back(k); } } keeps[j] = new_keeps_j; } total_keep_count = _info.detections_per_im(); } total_keep_per_batch[b] = total_keep_count; // Write results int cur_out_idx = 0; for(int j = j_start; j < num_classes; ++j) { auto &cur_keep = keeps[j]; auto cur_out_scores = reinterpret_cast(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); auto cur_out_classes = reinterpret_cast(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); const int box_column = (cur_start_idx + cur_out_idx) * 4; for(unsigned int k = 0; k < cur_keep.size(); ++k) { cur_out_scores[k] = in_scores[j][cur_keep[k]]; cur_out_classes[k] = static_cast(j); auto cur_out_box_row0 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k))); auto cur_out_box_row1 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k))); auto cur_out_box_row2 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k))); auto cur_out_box_row3 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k))); *cur_out_box_row0 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k]))); *cur_out_box_row1 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k]))); *cur_out_box_row2 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k]))); *cur_out_box_row3 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k]))); } cur_out_idx += cur_keep.size(); } if(_keeps != nullptr) { cur_out_idx = 0; for(int j = 0; j < num_classes; ++j) { for(unsigned int i = 0; i < keeps[j].size(); ++i) { *reinterpret_cast(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast(keeps[j].at(i)); } *reinterpret_cast(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = keeps[j].size(); cur_out_idx += keeps[j].size(); } } offset += num_boxes; cur_start_idx += total_keep_count; } if(_batch_splits_out != nullptr) { for(int b = 0; b < batch_size; ++b) { *reinterpret_cast(_batch_splits_out->ptr_to_element(Coordinates(b))) = total_keep_per_batch[b]; } } } void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info) { ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, boxes_in, scores_out); const unsigned int num_classes = scores_in->info()->dimension(0); ARM_COMPUTE_UNUSED(num_classes); ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), "First dimension of input boxes must be of size 4*num_classes"); ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), "Input scores and input boxes must have the same number of rows"); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != boxes_out->info()->dimension(1)); ARM_COMPUTE_ERROR_ON(boxes_out->info()->dimension(0) != 4); if(keeps != nullptr) { ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, "keeps_size cannot be nullptr if keeps has to be provided as output"); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keeps_size, 1, DataType::U32); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != keeps->info()->dimension(0)); ARM_COMPUTE_ERROR_ON(num_classes != keeps_size->info()->dimension(0)); } if(batch_splits_in != nullptr) { ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_in); } if(batch_splits_out != nullptr) { ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_out); } _scores_in = scores_in; _boxes_in = boxes_in; _batch_splits_in = batch_splits_in; _scores_out = scores_out; _boxes_out = boxes_out; _classes = classes; _batch_splits_out = batch_splits_out; _keeps = keeps; _keeps_size = keeps_size; _info = info; // Configure kernel window Window win = calculate_max_window(*scores_in->info(), Steps(scores_in->info()->dimension(0))); IKernel::configure(win); } void CPPBoxWithNonMaximaSuppressionLimitKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_UNUSED(window); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window); switch(_scores_in->info()->data_type()) { case DataType::F32: run_nmslimit(); break; case DataType::F16: run_nmslimit(); break; default: ARM_COMPUTE_ERROR("Not supported"); } } } // namespace arm_compute