diff options
Diffstat (limited to 'src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp')
-rw-r--r-- | src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp | 176 |
1 files changed, 105 insertions, 71 deletions
diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp index fb1754247c..02686eb4f6 100644 --- a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp +++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 Arm Limited. + * Copyright (c) 2018-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" #include "arm_compute/core/Helpers.h" + #include "src/core/helpers/WindowHelpers.h" #include <algorithm> @@ -34,7 +35,11 @@ namespace arm_compute namespace { template <typename T> -std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &scores_in, std::vector<int> inds, const BoxNMSLimitInfo &info, int class_id) +std::vector<int> SoftNMS(const ITensor *proposals, + std::vector<std::vector<T>> &scores_in, + std::vector<int> inds, + const BoxNMSLimitInfo &info, + int class_id) { std::vector<int> keep; const int proposals_width = proposals->info()->dimension(1); @@ -45,7 +50,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> & std::vector<T> y2(proposals_width); std::vector<T> areas(proposals_width); - for(int i = 0; i < proposals_width; ++i) + for (int i = 0; i < proposals_width; ++i) { x1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4, i))); y1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i))); @@ -56,13 +61,13 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> & // Note: Soft NMS scores have already been initialized with input scores - while(!inds.empty()) + while (!inds.empty()) { // Find proposal with max score among remaining proposals int max_pos = 0; - for(unsigned int i = 1; i < inds.size(); ++i) + for (unsigned int i = 1; i < inds.size(); ++i) { - if(scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)]) + if (scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)]) { max_pos = i; } @@ -75,7 +80,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> & inds.erase(inds.begin()); std::vector<int> sorted_indices_temp; - for(auto idx : inds) + for (auto idx : inds) { const auto xx1 = std::max(x1[idx], x1[element]); const auto yy1 = std::max(y1[idx], y1[element]); @@ -89,7 +94,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> & // Update scores based on computed IoU, overlap threshold and NMS method T weight; - switch(info.soft_nms_method()) + switch (info.soft_nms_method()) { case NMSType::LINEAR: weight = (ovr > info.nms()) ? (1.f - ovr) : 1.f; @@ -106,7 +111,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> & // Discard boxes with new scores below min threshold and update pending indices scores_in[class_id][idx] *= weight; - if(scores_in[class_id][idx] >= info.soft_nms_min_score_thres()) + if (scores_in[class_id][idx] >= info.soft_nms_min_score_thres()) { sorted_indices_temp.push_back(idx); } @@ -118,7 +123,10 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> & } template <typename T> -std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> sorted_indices, const BoxNMSLimitInfo &info, int class_id) +std::vector<int> NonMaximaSuppression(const ITensor *proposals, + std::vector<int> sorted_indices, + const BoxNMSLimitInfo &info, + int class_id) { std::vector<int> keep; @@ -130,7 +138,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> std::vector<T> y2(proposals_width); std::vector<T> areas(proposals_width); - for(int i = 0; i < proposals_width; ++i) + for (int i = 0; i < proposals_width; ++i) { x1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4, i))); y1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i))); @@ -139,7 +147,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0); } - while(!sorted_indices.empty()) + while (!sorted_indices.empty()) { int i = sorted_indices.at(0); keep.push_back(i); @@ -148,7 +156,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> std::vector<int> new_indices; sorted_indices_temp.erase(sorted_indices_temp.begin()); - for(unsigned int j = 0; j < sorted_indices_temp.size(); ++j) + for (unsigned int j = 0; j < sorted_indices_temp.size(); ++j) { const float xx1 = std::max(x1[sorted_indices_temp.at(j)], x1[i]); const float yy1 = std::max(y1[sorted_indices_temp.at(j)], y1[i]); @@ -163,8 +171,9 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> const float ctr_y = yy1 + (h / 2); // If suppress_size is specified, filter the boxes based on their size and position - const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() && ctr_x < info.im_width() && ctr_y < info.im_height()); - if(ovr <= info.nms() && keep_size) + const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() && + ctr_x < info.im_width() && ctr_y < info.im_height()); + if (ovr <= info.nms() && keep_size) { new_indices.push_back(j); } @@ -172,7 +181,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> const unsigned int new_indices_size = new_indices.size(); std::vector<int> new_sorted_indices(new_indices_size); - for(unsigned int i = 0; i < new_indices_size; ++i) + for (unsigned int i = 0; i < new_indices_size; ++i) { new_sorted_indices[i] = sorted_indices[new_indices[i] + 1]; } @@ -184,7 +193,15 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> } // namespace CPPBoxWithNonMaximaSuppressionLimitKernel::CPPBoxWithNonMaximaSuppressionLimitKernel() - : _scores_in(nullptr), _boxes_in(nullptr), _batch_splits_in(nullptr), _scores_out(nullptr), _boxes_out(nullptr), _classes(nullptr), _batch_splits_out(nullptr), _keeps(nullptr), _keeps_size(nullptr), + : _scores_in(nullptr), + _boxes_in(nullptr), + _batch_splits_in(nullptr), + _scores_out(nullptr), + _boxes_out(nullptr), + _classes(nullptr), + _batch_splits_out(nullptr), + _keeps(nullptr), + _keeps_size(nullptr), _info() { } @@ -197,7 +214,7 @@ bool CPPBoxWithNonMaximaSuppressionLimitKernel::is_parallelisable() const template <typename T> void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() { - const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0); + const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0); const int num_classes = _scores_in->info()->dimension(0); const int scores_count = _scores_in->info()->dimension(1); std::vector<int> total_keep_per_batch(batch_size); @@ -205,53 +222,48 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() int total_keep_count = 0; std::vector<std::vector<T>> in_scores(num_classes, std::vector<T>(scores_count)); - for(int i = 0; i < scores_count; ++i) + for (int i = 0; i < scores_count; ++i) { - for(int j = 0; j < num_classes; ++j) + for (int j = 0; j < num_classes; ++j) { in_scores[j][i] = *reinterpret_cast<const T *>(_scores_in->ptr_to_element(Coordinates(j, i))); } } - int offset = 0; int cur_start_idx = 0; - for(int b = 0; b < batch_size; ++b) + for (int b = 0; b < batch_size; ++b) { - const int num_boxes = _batch_splits_in == nullptr ? 1 : static_cast<int>(*reinterpret_cast<T *>(_batch_splits_in->ptr_to_element(Coordinates(b)))); // Skip first class if there is more than 1 except if the number of classes is 1. const int j_start = (num_classes == 1 ? 0 : 1); - for(int j = j_start; j < num_classes; ++j) + for (int j = j_start; j < num_classes; ++j) { std::vector<T> cur_scores(scores_count); std::vector<int> inds; - for(int i = 0; i < scores_count; ++i) + for (int i = 0; i < scores_count; ++i) { const T score = in_scores[j][i]; cur_scores[i] = score; - if(score > _info.score_thresh()) + if (score > _info.score_thresh()) { inds.push_back(i); } } - if(_info.soft_nms_enabled()) + if (_info.soft_nms_enabled()) { keeps[j] = SoftNMS(_boxes_in, in_scores, inds, _info, j); } else { std::sort(inds.data(), inds.data() + inds.size(), - [&cur_scores](int lhs, int rhs) - { - return cur_scores[lhs] > cur_scores[rhs]; - }); + [&cur_scores](int lhs, int rhs) { return cur_scores[lhs] > cur_scores[rhs]; }); keeps[j] = NonMaximaSuppression<T>(_boxes_in, inds, _info, j); } total_keep_count += keeps[j].size(); } - if(_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im()) + if (_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im()) { // merge all scores (represented by indices) together and sort auto get_all_scores_sorted = [&in_scores, &keeps, total_keep_count]() @@ -259,10 +271,10 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() std::vector<T> ret(total_keep_count); int ret_idx = 0; - for(unsigned int i = 1; i < keeps.size(); ++i) + for (unsigned int i = 1; i < keeps.size(); ++i) { auto &cur_keep = keeps[i]; - for(auto &ckv : cur_keep) + for (auto &ckv : cur_keep) { ret[ret_idx++] = in_scores[i][ckv]; } @@ -275,13 +287,13 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() auto all_scores_sorted = get_all_scores_sorted(); const T image_thresh = all_scores_sorted[all_scores_sorted.size() - _info.detections_per_im()]; - for(int j = 1; j < num_classes; ++j) + for (int j = 1; j < num_classes; ++j) { auto &cur_keep = keeps[j]; std::vector<int> new_keeps_j; - for(auto &k : cur_keep) + for (auto &k : cur_keep) { - if(in_scores[j][k] >= image_thresh) + if (in_scores[j][k] >= image_thresh) { new_keeps_j.push_back(k); } @@ -295,59 +307,78 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() // Write results int cur_out_idx = 0; - for(int j = j_start; j < num_classes; ++j) + for (int j = j_start; j < num_classes; ++j) { - auto &cur_keep = keeps[j]; - auto cur_out_scores = reinterpret_cast<T *>(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); - auto cur_out_classes = reinterpret_cast<T *>(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); - const int box_column = (cur_start_idx + cur_out_idx) * 4; - - for(unsigned int k = 0; k < cur_keep.size(); ++k) + auto &cur_keep = keeps[j]; + auto cur_out_scores = + reinterpret_cast<T *>(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); + auto cur_out_classes = + reinterpret_cast<T *>(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); + const int box_column = (cur_start_idx + cur_out_idx) * 4; + + for (unsigned int k = 0; k < cur_keep.size(); ++k) { - cur_out_scores[k] = in_scores[j][cur_keep[k]]; - cur_out_classes[k] = static_cast<T>(j); - auto cur_out_box_row0 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k))); - auto cur_out_box_row1 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k))); - auto cur_out_box_row2 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k))); - auto cur_out_box_row3 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k))); - *cur_out_box_row0 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k]))); - *cur_out_box_row1 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k]))); - *cur_out_box_row2 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k]))); - *cur_out_box_row3 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k]))); + cur_out_scores[k] = in_scores[j][cur_keep[k]]; + cur_out_classes[k] = static_cast<T>(j); + auto cur_out_box_row0 = + reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k))); + auto cur_out_box_row1 = + reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k))); + auto cur_out_box_row2 = + reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k))); + auto cur_out_box_row3 = + reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k))); + *cur_out_box_row0 = + *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k]))); + *cur_out_box_row1 = + *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k]))); + *cur_out_box_row2 = + *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k]))); + *cur_out_box_row3 = + *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k]))); } cur_out_idx += cur_keep.size(); } - if(_keeps != nullptr) + if (_keeps != nullptr) { cur_out_idx = 0; - for(int j = 0; j < num_classes; ++j) + for (int j = 0; j < num_classes; ++j) { - for(unsigned int i = 0; i < keeps[j].size(); ++i) + for (unsigned int i = 0; i < keeps[j].size(); ++i) { - *reinterpret_cast<T *>(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast<T>(keeps[j].at(i)); + *reinterpret_cast<T *>(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = + static_cast<T>(keeps[j].at(i)); } - *reinterpret_cast<uint32_t *>(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = keeps[j].size(); + *reinterpret_cast<uint32_t *>(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = + keeps[j].size(); cur_out_idx += keeps[j].size(); } } - offset += num_boxes; cur_start_idx += total_keep_count; } - if(_batch_splits_out != nullptr) + if (_batch_splits_out != nullptr) { - for(int b = 0; b < batch_size; ++b) + for (int b = 0; b < batch_size; ++b) { *reinterpret_cast<float *>(_batch_splits_out->ptr_to_element(Coordinates(b))) = total_keep_per_batch[b]; } } } -void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, - ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info) +void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in, + const ITensor *boxes_in, + const ITensor *batch_splits_in, + ITensor *scores_out, + ITensor *boxes_out, + ITensor *classes, + ITensor *batch_splits_out, + ITensor *keeps, + ITensor *keeps_size, + const BoxNMSLimitInfo info) { ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::F16, DataType::F32); @@ -355,25 +386,28 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_ const unsigned int num_classes = scores_in->info()->dimension(0); ARM_COMPUTE_UNUSED(num_classes); - ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), "First dimension of input boxes must be of size 4*num_classes"); - ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), "Input scores and input boxes must have the same number of rows"); + ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), + "First dimension of input boxes must be of size 4*num_classes"); + ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), + "Input scores and input boxes must have the same number of rows"); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != boxes_out->info()->dimension(1)); ARM_COMPUTE_ERROR_ON(boxes_out->info()->dimension(0) != 4); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != classes->info()->dimension(0)); - if(keeps != nullptr) + if (keeps != nullptr) { - ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, "keeps_size cannot be nullptr if keeps has to be provided as output"); + ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, + "keeps_size cannot be nullptr if keeps has to be provided as output"); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keeps_size, 1, DataType::U32); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != keeps->info()->dimension(0)); ARM_COMPUTE_ERROR_ON(num_classes != keeps_size->info()->dimension(0)); } - if(batch_splits_in != nullptr) + if (batch_splits_in != nullptr) { ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_in); } - if(batch_splits_out != nullptr) + if (batch_splits_out != nullptr) { ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_out); } @@ -402,7 +436,7 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run(const Window &window, const ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window); - switch(_scores_in->info()->data_type()) + switch (_scores_in->info()->data_type()) { case DataType::F32: run_nmslimit<float>(); |