aboutsummaryrefslogtreecommitdiff
path: root/src/core/CPP/kernels
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2018-09-19 09:46:47 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2018-11-06 11:10:26 +0000
commit2709d61ec25156387e48a75d19a5ab5a035fc12b (patch)
tree45ae48cb9e4ca929daef371c893d724a6bc8254e /src/core/CPP/kernels
parent3704464b68ddd22739b38354de1721a3db4267b5 (diff)
downloadComputeLibrary-2709d61ec25156387e48a75d19a5ab5a035fc12b.tar.gz
COMPMID-1328 Add support for BoxWithNMSLimit operator in CPP
Change-Id: I5aae537372bf797fbb2a2bae81038f8963b041a9
Diffstat (limited to 'src/core/CPP/kernels')
-rw-r--r--src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp409
1 files changed, 409 insertions, 0 deletions
diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
new file mode 100644
index 0000000000..89413fcca4
--- /dev/null
+++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
@@ -0,0 +1,409 @@
+/*
+ * Copyright (c) 2018 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+
+#include <algorithm>
+#include <cmath>
+
+namespace arm_compute
+{
+namespace
+{
+template <typename T>
+std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &scores_in, std::vector<int> inds, const BoxNMSLimitInfo &info, int class_id)
+{
+ std::vector<int> keep;
+ const int proposals_width = proposals->info()->dimension(1);
+
+ std::vector<T> x1(proposals_width);
+ std::vector<T> y1(proposals_width);
+ std::vector<T> x2(proposals_width);
+ std::vector<T> y2(proposals_width);
+ std::vector<T> areas(proposals_width);
+
+ for(int i = 0; i < proposals_width; ++i)
+ {
+ x1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
+ y1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
+ x2[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i)));
+ y2[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i)));
+ areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0);
+ }
+
+ // Note: Soft NMS scores have already been initialize with input scores
+
+ while(!inds.empty())
+ {
+ // Find proposal with max score among remaining proposals
+ int max_pos = 0;
+ for(unsigned int i = 1; i < inds.size(); ++i)
+ {
+ if(scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)])
+ {
+ max_pos = i;
+ }
+ }
+ int element = inds.at(max_pos);
+ keep.push_back(element);
+ std::swap(inds.at(0), inds.at(max_pos));
+
+ // Remove first element and compute IoU of the remaining boxes with identified max box
+ inds.erase(inds.begin());
+
+ std::vector<int> sorted_indices_temp;
+ for(auto idx : inds)
+ {
+ const auto xx1 = std::max(x1[idx], x1[element]);
+ const auto yy1 = std::max(y1[idx], y1[element]);
+ const auto xx2 = std::min(x2[idx], x2[element]);
+ const auto yy2 = std::min(y2[idx], y2[element]);
+
+ const auto w = std::max((xx2 - xx1 + 1.f), 0.f);
+ const auto h = std::max((yy2 - yy1 + 1.f), 0.f);
+ const auto inter = w * h;
+ const auto ovr = inter / (areas[element] + areas[idx] - inter);
+
+ // Update scores based on computed IoU, overlap threshold and NMS method
+ T weight;
+ switch(info.soft_nms_method())
+ {
+ case NMSType::LINEAR:
+ weight = (ovr > info.nms()) ? (1.f - ovr) : 1.f;
+ break;
+ case NMSType::GAUSSIAN: // Gaussian
+ weight = std::exp(-1.f * ovr * ovr / info.soft_nms_sigma());
+ break;
+ case NMSType::ORIGINAL: // Original NMS
+ weight = (ovr > info.nms()) ? 0.f : 1.f;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Not supported");
+ }
+
+ // Discard boxes with new scores below min threshold and update pending indices
+ scores_in[class_id][idx] *= weight;
+ if(scores_in[class_id][idx] >= info.soft_nms_min_score_thres())
+ {
+ sorted_indices_temp.push_back(idx);
+ }
+ }
+ inds = sorted_indices_temp;
+ }
+
+ return keep;
+}
+
+template <typename T>
+std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> sorted_indices, const BoxNMSLimitInfo &info, int class_id)
+{
+ std::vector<int> keep;
+
+ const int proposals_width = proposals->info()->dimension(1);
+
+ std::vector<T> x1(proposals_width);
+ std::vector<T> y1(proposals_width);
+ std::vector<T> x2(proposals_width);
+ std::vector<T> y2(proposals_width);
+ std::vector<T> areas(proposals_width);
+
+ for(int i = 0; i < proposals_width; ++i)
+ {
+ x1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
+ y1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
+ x2[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 2, i)));
+ y2[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 3, i)));
+ areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0);
+ }
+
+ while(!sorted_indices.empty())
+ {
+ int i = sorted_indices.at(0);
+ keep.push_back(i);
+
+ std::vector<int> sorted_indices_temp = sorted_indices;
+ std::vector<int> new_indices;
+ sorted_indices_temp.erase(sorted_indices_temp.begin());
+
+ for(unsigned int j = 0; j < sorted_indices_temp.size(); ++j)
+ {
+ const auto xx1 = std::max(x1[sorted_indices_temp.at(j)], x1[i]);
+ const auto yy1 = std::max(y1[sorted_indices_temp.at(j)], y1[i]);
+ const auto xx2 = std::min(x2[sorted_indices_temp.at(j)], x2[i]);
+ const auto yy2 = std::min(y2[sorted_indices_temp.at(j)], y2[i]);
+
+ const auto w = std::max((xx2 - xx1 + 1.f), 0.f);
+ const auto h = std::max((yy2 - yy1 + 1.f), 0.f);
+ const auto inter = w * h;
+ const auto ovr = inter / (areas[i] + areas[sorted_indices_temp.at(j)] - inter);
+
+ if(ovr <= info.nms())
+ {
+ new_indices.push_back(j);
+ }
+ }
+
+ const unsigned int new_indices_size = new_indices.size();
+ std::vector<int> new_sorted_indices(new_indices_size);
+ for(unsigned int i = 0; i < new_indices_size; ++i)
+ {
+ new_sorted_indices[i] = sorted_indices[new_indices[i] + 1];
+ }
+ sorted_indices = new_sorted_indices;
+ }
+
+ return keep;
+}
+} // namespace
+
+CPPBoxWithNonMaximaSuppressionLimitKernel::CPPBoxWithNonMaximaSuppressionLimitKernel()
+ : _scores_in(nullptr), _boxes_in(nullptr), _batch_splits_in(nullptr), _scores_out(nullptr), _boxes_out(nullptr), _classes(nullptr), _batch_splits_out(nullptr), _keeps(nullptr), _keeps_size(nullptr),
+ _info()
+{
+}
+
+bool CPPBoxWithNonMaximaSuppressionLimitKernel::is_parallelisable() const
+{
+ return false;
+}
+
+template <typename T>
+void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
+{
+ const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0);
+ const int num_classes = _scores_in->info()->dimension(0);
+ const int scores_count = _scores_in->info()->dimension(1);
+ std::vector<int> total_keep_per_batch(batch_size);
+ std::vector<std::vector<int>> keeps(num_classes);
+ int total_keep_count = 0;
+
+ std::vector<std::vector<T>> in_scores(num_classes, std::vector<T>(scores_count));
+ for(int i = 0; i < scores_count; ++i)
+ {
+ for(int j = 0; j < num_classes; ++j)
+ {
+ in_scores[j][i] = *reinterpret_cast<const T *>(_scores_in->ptr_to_element(Coordinates(j, i)));
+ }
+ }
+
+ int offset = 0;
+ int cur_start_idx = 0;
+ for(int b = 0; b < batch_size; ++b)
+ {
+ const int num_boxes = _batch_splits_in == nullptr ? 1 : static_cast<int>(*reinterpret_cast<T *>(_batch_splits_in->ptr_to_element(Coordinates(b))));
+ // Skip first class
+ for(int j = 1; j < num_classes; ++j)
+ {
+ std::vector<T> cur_scores(scores_count);
+ std::vector<int> inds;
+ for(int i = 0; i < scores_count; ++i)
+ {
+ const T score = in_scores[j][i];
+ cur_scores[i] = score;
+
+ if(score > _info.score_thresh())
+ {
+ inds.push_back(i);
+ }
+ }
+ if(_info.soft_nms_enabled())
+ {
+ keeps[j] = SoftNMS(_boxes_in, in_scores, inds, _info, j);
+ }
+ else
+ {
+ std::sort(inds.data(), inds.data() + inds.size(),
+ [&cur_scores](int lhs, int rhs)
+ {
+ return cur_scores[lhs] > cur_scores[rhs];
+ });
+
+ keeps[j] = NonMaximaSuppression<T>(_boxes_in, inds, _info, j);
+ }
+ total_keep_count += keeps[j].size();
+ }
+
+ if(_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im())
+ {
+ // merge all scores (represented by indices) together and sort
+ auto get_all_scores_sorted = [&in_scores, &keeps, total_keep_count]()
+ {
+ std::vector<T> ret(total_keep_count);
+
+ int ret_idx = 0;
+ for(unsigned int i = 1; i < keeps.size(); ++i)
+ {
+ auto &cur_keep = keeps[i];
+ for(auto &ckv : cur_keep)
+ {
+ ret[ret_idx++] = in_scores[i][ckv];
+ }
+ }
+
+ std::sort(ret.data(), ret.data() + ret.size());
+
+ return ret;
+ };
+
+ auto all_scores_sorted = get_all_scores_sorted();
+ const T image_thresh = all_scores_sorted[all_scores_sorted.size() - _info.detections_per_im()];
+ for(int j = 1; j < num_classes; ++j)
+ {
+ auto &cur_keep = keeps[j];
+ std::vector<int> new_keeps_j;
+ for(auto &k : cur_keep)
+ {
+ if(in_scores[j][k] >= image_thresh)
+ {
+ new_keeps_j.push_back(k);
+ }
+ }
+ keeps[j] = new_keeps_j;
+ }
+ total_keep_count = _info.detections_per_im();
+ }
+
+ total_keep_per_batch[b] = total_keep_count;
+
+ // Write results
+ int cur_out_idx = 0;
+ for(int j = 1; j < num_classes; ++j)
+ {
+ auto &cur_keep = keeps[j];
+ auto cur_out_scores = reinterpret_cast<T *>(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
+ auto cur_out_classes = reinterpret_cast<T *>(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
+ const int box_column = (cur_start_idx + cur_out_idx) * 4;
+
+ for(unsigned int k = 0; k < cur_keep.size(); ++k)
+ {
+ cur_out_scores[k] = in_scores[j][cur_keep[k]];
+ cur_out_classes[k] = static_cast<T>(j);
+ auto cur_out_box_row0 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k)));
+ auto cur_out_box_row1 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k)));
+ auto cur_out_box_row2 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k)));
+ auto cur_out_box_row3 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k)));
+ *cur_out_box_row0 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k])));
+ *cur_out_box_row1 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k])));
+ *cur_out_box_row2 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k])));
+ *cur_out_box_row3 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k])));
+ }
+
+ cur_out_idx += cur_keep.size();
+ }
+
+ if(_keeps != nullptr)
+ {
+ cur_out_idx = 0;
+ for(int j = 0; j < num_classes; ++j)
+ {
+ for(unsigned int i = 0; i < keeps[j].size(); ++i)
+ {
+ *reinterpret_cast<T *>(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast<T>(keeps[j].at(i));
+ }
+ *reinterpret_cast<T *>(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = static_cast<T>(keeps[j].size());
+ cur_out_idx += keeps[j].size();
+ }
+ }
+
+ offset += num_boxes;
+ cur_start_idx += total_keep_count;
+ }
+
+ if(_batch_splits_out != nullptr)
+ {
+ for(int b = 0; b < batch_size; ++b)
+ {
+ *reinterpret_cast<float *>(_batch_splits_out->ptr_to_element(Coordinates(b))) = total_keep_per_batch[b];
+ }
+ }
+}
+
+void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
+ ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::F16, DataType::F32);
+ const unsigned int num_classes = scores_in->info()->dimension(0);
+
+ ARM_COMPUTE_UNUSED(num_classes);
+ ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), "First dimension of input boxes must be of size 4*num_classes");
+ ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), "Input scores and input boxes must have the same number of rows");
+ ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != boxes_out->info()->dimension(1));
+ ARM_COMPUTE_ERROR_ON(boxes_out->info()->dimension(0) != 4);
+ if(keeps != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, "keeps_size cannot be nullptr if keeps has to be provided as output");
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps_size);
+ ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != keeps->info()->dimension(0));
+ ARM_COMPUTE_ERROR_ON(num_classes != keeps_size->info()->dimension(0));
+ }
+ if(batch_splits_in != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_in);
+ }
+ if(batch_splits_out != nullptr)
+ {
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_out);
+ }
+
+ _scores_in = scores_in;
+ _boxes_in = boxes_in;
+ _batch_splits_in = batch_splits_in;
+ _scores_out = scores_out;
+ _boxes_out = boxes_out;
+ _classes = classes;
+ _batch_splits_out = batch_splits_out;
+ _keeps = keeps;
+ _keeps_size = keeps_size;
+ _info = info;
+
+ // Configure kernel window
+ Window win = calculate_max_window(*scores_in->info(), Steps(scores_in->info()->dimension(0)));
+
+ IKernel::configure(win);
+}
+
+void CPPBoxWithNonMaximaSuppressionLimitKernel::run(const Window &window, const ThreadInfo &info)
+{
+ ARM_COMPUTE_UNUSED(info);
+ ARM_COMPUTE_UNUSED(window);
+ ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+ ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window);
+
+ switch(_scores_in->info()->data_type())
+ {
+ case DataType::F32:
+ run_nmslimit<float>();
+ break;
+ case DataType::F16:
+ run_nmslimit<half>();
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Not supported");
+ }
+}
+} // namespace arm_compute