aboutsummaryrefslogtreecommitdiff
path: root/src/core/CPP/kernels
diff options
context:
space:
mode:
authorFelix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-27 17:46:17 +0100
committerfelixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-28 12:08:05 +0000
commitafd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree03bc7d5a762099989b16a656fa8d397b490ed70e /src/core/CPP/kernels
parentbdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
downloadComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
Apply clang-format on repository
Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/core/CPP/kernels')
-rw-r--r--src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp171
-rw-r--r--src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp110
-rw-r--r--src/core/CPP/kernels/CPPPermuteKernel.cpp45
-rw-r--r--src/core/CPP/kernels/CPPTopKVKernel.cpp43
-rw-r--r--src/core/CPP/kernels/CPPUpsampleKernel.cpp17
5 files changed, 232 insertions, 154 deletions
diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
index 0f405d8e83..02686eb4f6 100644
--- a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
+++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
#include "arm_compute/core/Helpers.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include <algorithm>
@@ -34,7 +35,11 @@ namespace arm_compute
namespace
{
template <typename T>
-std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &scores_in, std::vector<int> inds, const BoxNMSLimitInfo &info, int class_id)
+std::vector<int> SoftNMS(const ITensor *proposals,
+ std::vector<std::vector<T>> &scores_in,
+ std::vector<int> inds,
+ const BoxNMSLimitInfo &info,
+ int class_id)
{
std::vector<int> keep;
const int proposals_width = proposals->info()->dimension(1);
@@ -45,7 +50,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &
std::vector<T> y2(proposals_width);
std::vector<T> areas(proposals_width);
- for(int i = 0; i < proposals_width; ++i)
+ for (int i = 0; i < proposals_width; ++i)
{
x1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
y1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
@@ -56,13 +61,13 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &
// Note: Soft NMS scores have already been initialized with input scores
- while(!inds.empty())
+ while (!inds.empty())
{
// Find proposal with max score among remaining proposals
int max_pos = 0;
- for(unsigned int i = 1; i < inds.size(); ++i)
+ for (unsigned int i = 1; i < inds.size(); ++i)
{
- if(scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)])
+ if (scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)])
{
max_pos = i;
}
@@ -75,7 +80,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &
inds.erase(inds.begin());
std::vector<int> sorted_indices_temp;
- for(auto idx : inds)
+ for (auto idx : inds)
{
const auto xx1 = std::max(x1[idx], x1[element]);
const auto yy1 = std::max(y1[idx], y1[element]);
@@ -89,7 +94,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &
// Update scores based on computed IoU, overlap threshold and NMS method
T weight;
- switch(info.soft_nms_method())
+ switch (info.soft_nms_method())
{
case NMSType::LINEAR:
weight = (ovr > info.nms()) ? (1.f - ovr) : 1.f;
@@ -106,7 +111,7 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &
// Discard boxes with new scores below min threshold and update pending indices
scores_in[class_id][idx] *= weight;
- if(scores_in[class_id][idx] >= info.soft_nms_min_score_thres())
+ if (scores_in[class_id][idx] >= info.soft_nms_min_score_thres())
{
sorted_indices_temp.push_back(idx);
}
@@ -118,7 +123,10 @@ std::vector<int> SoftNMS(const ITensor *proposals, std::vector<std::vector<T>> &
}
template <typename T>
-std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int> sorted_indices, const BoxNMSLimitInfo &info, int class_id)
+std::vector<int> NonMaximaSuppression(const ITensor *proposals,
+ std::vector<int> sorted_indices,
+ const BoxNMSLimitInfo &info,
+ int class_id)
{
std::vector<int> keep;
@@ -130,7 +138,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int>
std::vector<T> y2(proposals_width);
std::vector<T> areas(proposals_width);
- for(int i = 0; i < proposals_width; ++i)
+ for (int i = 0; i < proposals_width; ++i)
{
x1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4, i)));
y1[i] = *reinterpret_cast<T *>(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i)));
@@ -139,7 +147,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int>
areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0);
}
- while(!sorted_indices.empty())
+ while (!sorted_indices.empty())
{
int i = sorted_indices.at(0);
keep.push_back(i);
@@ -148,7 +156,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int>
std::vector<int> new_indices;
sorted_indices_temp.erase(sorted_indices_temp.begin());
- for(unsigned int j = 0; j < sorted_indices_temp.size(); ++j)
+ for (unsigned int j = 0; j < sorted_indices_temp.size(); ++j)
{
const float xx1 = std::max(x1[sorted_indices_temp.at(j)], x1[i]);
const float yy1 = std::max(y1[sorted_indices_temp.at(j)], y1[i]);
@@ -163,8 +171,9 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int>
const float ctr_y = yy1 + (h / 2);
// If suppress_size is specified, filter the boxes based on their size and position
- const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() && ctr_x < info.im_width() && ctr_y < info.im_height());
- if(ovr <= info.nms() && keep_size)
+ const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() &&
+ ctr_x < info.im_width() && ctr_y < info.im_height());
+ if (ovr <= info.nms() && keep_size)
{
new_indices.push_back(j);
}
@@ -172,7 +181,7 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int>
const unsigned int new_indices_size = new_indices.size();
std::vector<int> new_sorted_indices(new_indices_size);
- for(unsigned int i = 0; i < new_indices_size; ++i)
+ for (unsigned int i = 0; i < new_indices_size; ++i)
{
new_sorted_indices[i] = sorted_indices[new_indices[i] + 1];
}
@@ -184,7 +193,15 @@ std::vector<int> NonMaximaSuppression(const ITensor *proposals, std::vector<int>
} // namespace
CPPBoxWithNonMaximaSuppressionLimitKernel::CPPBoxWithNonMaximaSuppressionLimitKernel()
- : _scores_in(nullptr), _boxes_in(nullptr), _batch_splits_in(nullptr), _scores_out(nullptr), _boxes_out(nullptr), _classes(nullptr), _batch_splits_out(nullptr), _keeps(nullptr), _keeps_size(nullptr),
+ : _scores_in(nullptr),
+ _boxes_in(nullptr),
+ _batch_splits_in(nullptr),
+ _scores_out(nullptr),
+ _boxes_out(nullptr),
+ _classes(nullptr),
+ _batch_splits_out(nullptr),
+ _keeps(nullptr),
+ _keeps_size(nullptr),
_info()
{
}
@@ -197,7 +214,7 @@ bool CPPBoxWithNonMaximaSuppressionLimitKernel::is_parallelisable() const
template <typename T>
void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
{
- const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0);
+ const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0);
const int num_classes = _scores_in->info()->dimension(0);
const int scores_count = _scores_in->info()->dimension(1);
std::vector<int> total_keep_per_batch(batch_size);
@@ -205,51 +222,48 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
int total_keep_count = 0;
std::vector<std::vector<T>> in_scores(num_classes, std::vector<T>(scores_count));
- for(int i = 0; i < scores_count; ++i)
+ for (int i = 0; i < scores_count; ++i)
{
- for(int j = 0; j < num_classes; ++j)
+ for (int j = 0; j < num_classes; ++j)
{
in_scores[j][i] = *reinterpret_cast<const T *>(_scores_in->ptr_to_element(Coordinates(j, i)));
}
}
int cur_start_idx = 0;
- for(int b = 0; b < batch_size; ++b)
+ for (int b = 0; b < batch_size; ++b)
{
// Skip first class if there is more than 1 except if the number of classes is 1.
const int j_start = (num_classes == 1 ? 0 : 1);
- for(int j = j_start; j < num_classes; ++j)
+ for (int j = j_start; j < num_classes; ++j)
{
std::vector<T> cur_scores(scores_count);
std::vector<int> inds;
- for(int i = 0; i < scores_count; ++i)
+ for (int i = 0; i < scores_count; ++i)
{
const T score = in_scores[j][i];
cur_scores[i] = score;
- if(score > _info.score_thresh())
+ if (score > _info.score_thresh())
{
inds.push_back(i);
}
}
- if(_info.soft_nms_enabled())
+ if (_info.soft_nms_enabled())
{
keeps[j] = SoftNMS(_boxes_in, in_scores, inds, _info, j);
}
else
{
std::sort(inds.data(), inds.data() + inds.size(),
- [&cur_scores](int lhs, int rhs)
- {
- return cur_scores[lhs] > cur_scores[rhs];
- });
+ [&cur_scores](int lhs, int rhs) { return cur_scores[lhs] > cur_scores[rhs]; });
keeps[j] = NonMaximaSuppression<T>(_boxes_in, inds, _info, j);
}
total_keep_count += keeps[j].size();
}
- if(_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im())
+ if (_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im())
{
// merge all scores (represented by indices) together and sort
auto get_all_scores_sorted = [&in_scores, &keeps, total_keep_count]()
@@ -257,10 +271,10 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
std::vector<T> ret(total_keep_count);
int ret_idx = 0;
- for(unsigned int i = 1; i < keeps.size(); ++i)
+ for (unsigned int i = 1; i < keeps.size(); ++i)
{
auto &cur_keep = keeps[i];
- for(auto &ckv : cur_keep)
+ for (auto &ckv : cur_keep)
{
ret[ret_idx++] = in_scores[i][ckv];
}
@@ -273,13 +287,13 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
auto all_scores_sorted = get_all_scores_sorted();
const T image_thresh = all_scores_sorted[all_scores_sorted.size() - _info.detections_per_im()];
- for(int j = 1; j < num_classes; ++j)
+ for (int j = 1; j < num_classes; ++j)
{
auto &cur_keep = keeps[j];
std::vector<int> new_keeps_j;
- for(auto &k : cur_keep)
+ for (auto &k : cur_keep)
{
- if(in_scores[j][k] >= image_thresh)
+ if (in_scores[j][k] >= image_thresh)
{
new_keeps_j.push_back(k);
}
@@ -293,40 +307,52 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
// Write results
int cur_out_idx = 0;
- for(int j = j_start; j < num_classes; ++j)
+ for (int j = j_start; j < num_classes; ++j)
{
- auto &cur_keep = keeps[j];
- auto cur_out_scores = reinterpret_cast<T *>(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
- auto cur_out_classes = reinterpret_cast<T *>(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
- const int box_column = (cur_start_idx + cur_out_idx) * 4;
-
- for(unsigned int k = 0; k < cur_keep.size(); ++k)
+ auto &cur_keep = keeps[j];
+ auto cur_out_scores =
+ reinterpret_cast<T *>(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
+ auto cur_out_classes =
+ reinterpret_cast<T *>(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx)));
+ const int box_column = (cur_start_idx + cur_out_idx) * 4;
+
+ for (unsigned int k = 0; k < cur_keep.size(); ++k)
{
- cur_out_scores[k] = in_scores[j][cur_keep[k]];
- cur_out_classes[k] = static_cast<T>(j);
- auto cur_out_box_row0 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k)));
- auto cur_out_box_row1 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k)));
- auto cur_out_box_row2 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k)));
- auto cur_out_box_row3 = reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k)));
- *cur_out_box_row0 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k])));
- *cur_out_box_row1 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k])));
- *cur_out_box_row2 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k])));
- *cur_out_box_row3 = *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k])));
+ cur_out_scores[k] = in_scores[j][cur_keep[k]];
+ cur_out_classes[k] = static_cast<T>(j);
+ auto cur_out_box_row0 =
+ reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k)));
+ auto cur_out_box_row1 =
+ reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k)));
+ auto cur_out_box_row2 =
+ reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k)));
+ auto cur_out_box_row3 =
+ reinterpret_cast<T *>(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k)));
+ *cur_out_box_row0 =
+ *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k])));
+ *cur_out_box_row1 =
+ *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k])));
+ *cur_out_box_row2 =
+ *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k])));
+ *cur_out_box_row3 =
+ *reinterpret_cast<const T *>(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k])));
}
cur_out_idx += cur_keep.size();
}
- if(_keeps != nullptr)
+ if (_keeps != nullptr)
{
cur_out_idx = 0;
- for(int j = 0; j < num_classes; ++j)
+ for (int j = 0; j < num_classes; ++j)
{
- for(unsigned int i = 0; i < keeps[j].size(); ++i)
+ for (unsigned int i = 0; i < keeps[j].size(); ++i)
{
- *reinterpret_cast<T *>(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast<T>(keeps[j].at(i));
+ *reinterpret_cast<T *>(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) =
+ static_cast<T>(keeps[j].at(i));
}
- *reinterpret_cast<uint32_t *>(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = keeps[j].size();
+ *reinterpret_cast<uint32_t *>(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) =
+ keeps[j].size();
cur_out_idx += keeps[j].size();
}
}
@@ -334,17 +360,25 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit()
cur_start_idx += total_keep_count;
}
- if(_batch_splits_out != nullptr)
+ if (_batch_splits_out != nullptr)
{
- for(int b = 0; b < batch_size; ++b)
+ for (int b = 0; b < batch_size; ++b)
{
*reinterpret_cast<float *>(_batch_splits_out->ptr_to_element(Coordinates(b))) = total_keep_per_batch[b];
}
}
}
-void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes,
- ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info)
+void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in,
+ const ITensor *boxes_in,
+ const ITensor *batch_splits_in,
+ ITensor *scores_out,
+ ITensor *boxes_out,
+ ITensor *classes,
+ ITensor *batch_splits_out,
+ ITensor *keeps,
+ ITensor *keeps_size,
+ const BoxNMSLimitInfo info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::F16, DataType::F32);
@@ -352,25 +386,28 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_
const unsigned int num_classes = scores_in->info()->dimension(0);
ARM_COMPUTE_UNUSED(num_classes);
- ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), "First dimension of input boxes must be of size 4*num_classes");
- ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), "Input scores and input boxes must have the same number of rows");
+ ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0),
+ "First dimension of input boxes must be of size 4*num_classes");
+ ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1),
+ "Input scores and input boxes must have the same number of rows");
ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != boxes_out->info()->dimension(1));
ARM_COMPUTE_ERROR_ON(boxes_out->info()->dimension(0) != 4);
ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != classes->info()->dimension(0));
- if(keeps != nullptr)
+ if (keeps != nullptr)
{
- ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, "keeps_size cannot be nullptr if keeps has to be provided as output");
+ ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr,
+ "keeps_size cannot be nullptr if keeps has to be provided as output");
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps);
ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keeps_size, 1, DataType::U32);
ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != keeps->info()->dimension(0));
ARM_COMPUTE_ERROR_ON(num_classes != keeps_size->info()->dimension(0));
}
- if(batch_splits_in != nullptr)
+ if (batch_splits_in != nullptr)
{
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_in);
}
- if(batch_splits_out != nullptr)
+ if (batch_splits_out != nullptr)
{
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_out);
}
@@ -399,7 +436,7 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run(const Window &window, const
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window);
- switch(_scores_in->info()->data_type())
+ switch (_scores_in->info()->data_type())
{
case DataType::F32:
run_nmslimit<float>();
diff --git a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
index c1187ff2b3..1224ec14a7 100644
--- a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
+++ b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp
@@ -35,15 +35,22 @@ namespace arm_compute
{
namespace
{
-Status validate_arguments(const ITensorInfo *bboxes, const ITensorInfo *scores, const ITensorInfo *output_indices, unsigned int max_output_size,
- const float score_threshold, const float iou_threshold)
+Status validate_arguments(const ITensorInfo *bboxes,
+ const ITensorInfo *scores,
+ const ITensorInfo *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(bboxes, scores, output_indices);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bboxes, 1, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_indices, 1, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(bboxes->num_dimensions() > 2, "The bboxes tensor must be a 2-D float tensor of shape [4, num_boxes].");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(scores->num_dimensions() > 1, "The scores tensor must be a 1-D float tensor of shape [num_boxes].");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_indices->num_dimensions() > 1, "The indices must be 1-D integer tensor of shape [M], where max_output_size <= M");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(bboxes->num_dimensions() > 2,
+ "The bboxes tensor must be a 2-D float tensor of shape [4, num_boxes].");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(scores->num_dimensions() > 1,
+ "The scores tensor must be a 1-D float tensor of shape [num_boxes].");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_indices->num_dimensions() > 1,
+ "The indices must be 1-D integer tensor of shape [M], where max_output_size <= M");
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(bboxes, scores);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_indices->dimension(0) == 0, "Indices tensor must be bigger than 0");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(max_output_size == 0, "Max size cannot be 0");
@@ -55,15 +62,26 @@ Status validate_arguments(const ITensorInfo *bboxes, const ITensorInfo *scores,
} // namespace
CPPNonMaximumSuppressionKernel::CPPNonMaximumSuppressionKernel()
- : _input_bboxes(nullptr), _input_scores(nullptr), _output_indices(nullptr), _max_output_size(0), _score_threshold(0.f), _iou_threshold(0.f), _num_boxes(0)
+ : _input_bboxes(nullptr),
+ _input_scores(nullptr),
+ _output_indices(nullptr),
+ _max_output_size(0),
+ _score_threshold(0.f),
+ _iou_threshold(0.f),
+ _num_boxes(0)
{
}
-void CPPNonMaximumSuppressionKernel::configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices,
- unsigned int max_output_size, const float score_threshold, const float iou_threshold)
+void CPPNonMaximumSuppressionKernel::configure(const ITensor *input_bboxes,
+ const ITensor *input_scores,
+ ITensor *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input_bboxes, input_scores, output_indices);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_bboxes->info(), input_scores->info(), output_indices->info(), max_output_size, score_threshold, iou_threshold));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_bboxes->info(), input_scores->info(), output_indices->info(),
+ max_output_size, score_threshold, iou_threshold));
auto_init_if_empty(*output_indices->info(), TensorShape(max_output_size), 1, DataType::U8, QuantizationInfo());
@@ -82,10 +100,15 @@ void CPPNonMaximumSuppressionKernel::configure(const ITensor *input_bboxes, cons
ICPPKernel::configure(win);
}
-Status CPPNonMaximumSuppressionKernel::validate(const ITensorInfo *bboxes, const ITensorInfo *scores, const ITensorInfo *output_indices,
- unsigned int max_output_size, const float score_threshold, const float iou_threshold)
+Status CPPNonMaximumSuppressionKernel::validate(const ITensorInfo *bboxes,
+ const ITensorInfo *scores,
+ const ITensorInfo *output_indices,
+ unsigned int max_output_size,
+ const float score_threshold,
+ const float iou_threshold)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(bboxes, scores, output_indices, max_output_size, score_threshold, iou_threshold));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ validate_arguments(bboxes, scores, output_indices, max_output_size, score_threshold, iou_threshold));
return Status{};
}
@@ -99,10 +122,10 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo
// Auxiliary tensors
std::vector<int> indices_above_thd;
std::vector<float> scores_above_thd;
- for(unsigned int i = 0; i < _num_boxes; ++i)
+ for (unsigned int i = 0; i < _num_boxes; ++i)
{
const float score_i = *(reinterpret_cast<float *>(_input_scores->ptr_to_element(Coordinates(i))));
- if(score_i >= _score_threshold)
+ if (score_i >= _score_threshold)
{
scores_above_thd.emplace_back(score_i);
indices_above_thd.emplace_back(i);
@@ -114,12 +137,9 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo
std::vector<unsigned int> sorted_indices;
sorted_indices.resize(num_above_thd);
std::iota(sorted_indices.data(), sorted_indices.data() + num_above_thd, 0);
- std::sort(std::begin(sorted_indices),
- std::end(sorted_indices),
+ std::sort(std::begin(sorted_indices), std::end(sorted_indices),
[&](unsigned int first, unsigned int second)
- {
- return scores_above_thd[first] > scores_above_thd[second];
- });
+ { return scores_above_thd[first] > scores_above_thd[second]; });
// Number of output is the minimum between max_detection and the scores above the threshold
const unsigned int num_output = std::min(_max_output_size, num_above_thd);
@@ -127,19 +147,20 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo
std::vector<bool> visited(num_above_thd, false);
// Keep only boxes with small IoU
- for(unsigned int i = 0; i < num_above_thd; ++i)
+ for (unsigned int i = 0; i < num_above_thd; ++i)
{
// Check if the output is full
- if(output_idx >= num_output)
+ if (output_idx >= num_output)
{
break;
}
// Check if it was already visited, if not add it to the output and update the indices counter
- if(!visited[sorted_indices[i]])
+ if (!visited[sorted_indices[i]])
{
- *(reinterpret_cast<int *>(_output_indices->ptr_to_element(Coordinates(output_idx)))) = indices_above_thd[sorted_indices[i]];
- visited[sorted_indices[i]] = true;
+ *(reinterpret_cast<int *>(_output_indices->ptr_to_element(Coordinates(output_idx)))) =
+ indices_above_thd[sorted_indices[i]];
+ visited[sorted_indices[i]] = true;
++output_idx;
}
else
@@ -148,28 +169,36 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo
}
// Once added one element at the output check if the next ones overlap and can be skipped
- for(unsigned int j = i + 1; j < num_above_thd; ++j)
+ for (unsigned int j = i + 1; j < num_above_thd; ++j)
{
- if(!visited[sorted_indices[j]])
+ if (!visited[sorted_indices[j]])
{
// Calculate IoU
const unsigned int i_index = indices_above_thd[sorted_indices[i]];
const unsigned int j_index = indices_above_thd[sorted_indices[j]];
// Box-corner format: xmin, ymin, xmax, ymax
- const auto box_i_xmin = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(0, i_index))));
- const auto box_i_ymin = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(1, i_index))));
- const auto box_i_xmax = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(2, i_index))));
- const auto box_i_ymax = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(3, i_index))));
-
- const auto box_j_xmin = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(0, j_index))));
- const auto box_j_ymin = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(1, j_index))));
- const auto box_j_xmax = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(2, j_index))));
- const auto box_j_ymax = *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(3, j_index))));
+ const auto box_i_xmin =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(0, i_index))));
+ const auto box_i_ymin =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(1, i_index))));
+ const auto box_i_xmax =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(2, i_index))));
+ const auto box_i_ymax =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(3, i_index))));
+
+ const auto box_j_xmin =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(0, j_index))));
+ const auto box_j_ymin =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(1, j_index))));
+ const auto box_j_xmax =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(2, j_index))));
+ const auto box_j_ymax =
+ *(reinterpret_cast<float *>(_input_bboxes->ptr_to_element(Coordinates(3, j_index))));
const float area_i = (box_i_xmax - box_i_xmin) * (box_i_ymax - box_i_ymin);
const float area_j = (box_j_xmax - box_j_xmin) * (box_j_ymax - box_j_ymin);
float overlap;
- if(area_i <= 0 || area_j <= 0)
+ if (area_i <= 0 || area_j <= 0)
{
overlap = 0.0f;
}
@@ -179,11 +208,12 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo
const auto x_min_intersection = std::max<float>(box_i_xmin, box_j_xmin);
const auto y_max_intersection = std::min<float>(box_i_ymax, box_j_ymax);
const auto x_max_intersection = std::min<float>(box_i_xmax, box_j_xmax);
- const auto area_intersection = std::max<float>(y_max_intersection - y_min_intersection, 0.0f) * std::max<float>(x_max_intersection - x_min_intersection, 0.0f);
- overlap = area_intersection / (area_i + area_j - area_intersection);
+ const auto area_intersection = std::max<float>(y_max_intersection - y_min_intersection, 0.0f) *
+ std::max<float>(x_max_intersection - x_min_intersection, 0.0f);
+ overlap = area_intersection / (area_i + area_j - area_intersection);
}
- if(overlap > _iou_threshold)
+ if (overlap > _iou_threshold)
{
visited[sorted_indices[j]] = true;
}
@@ -192,7 +222,7 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo
}
// The output could be full but not the output indices tensor
// Instead return values not valid we put -1
- for(; output_idx < _max_output_size; ++output_idx)
+ for (; output_idx < _max_output_size; ++output_idx)
{
*(reinterpret_cast<int *>(_output_indices->ptr_to_element(Coordinates(output_idx)))) = -1;
}
diff --git a/src/core/CPP/kernels/CPPPermuteKernel.cpp b/src/core/CPP/kernels/CPPPermuteKernel.cpp
index 054c7bf05a..e68090d82b 100644
--- a/src/core/CPP/kernels/CPPPermuteKernel.cpp
+++ b/src/core/CPP/kernels/CPPPermuteKernel.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
@@ -43,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input, perm);
// Validate configured output
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
@@ -65,7 +66,7 @@ void CPPPermuteKernel::run_permute(const Window &window)
// Create output window
Window window_out(window);
const Window::Dimension zero_window = Window::Dimension(0, 0, 0);
- for(size_t d = 0; d <= _perm.num_dimensions(); ++d)
+ for (size_t d = 0; d <= _perm.num_dimensions(); ++d)
{
window_out.set(d, zero_window);
}
@@ -74,28 +75,32 @@ void CPPPermuteKernel::run_permute(const Window &window)
Iterator in(_input, window);
Iterator out(_output, window_out);
- if(_input->info()->num_dimensions() <= 3)
+ if (_input->info()->num_dimensions() <= 3)
{
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2];
- *(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));
- },
- in, out);
+ execute_window_loop(
+ window,
+ [&](const Coordinates &id)
+ {
+ const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2];
+ *(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));
+ },
+ in, out);
}
- else if(_input->info()->num_dimensions() >= 4)
+ else if (_input->info()->num_dimensions() >= 4)
{
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2] + id[3] * perm_strides[3];
- *(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));
- },
- in, out);
+ execute_window_loop(
+ window,
+ [&](const Coordinates &id)
+ {
+ const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2] +
+ id[3] * perm_strides[3];
+ *(reinterpret_cast<T *>(out.ptr() + idx)) = *(reinterpret_cast<const T *>(in.ptr()));
+ },
+ in, out);
}
}
-CPPPermuteKernel::CPPPermuteKernel()
- : _func(), _input(nullptr), _output(nullptr), _perm()
+CPPPermuteKernel::CPPPermuteKernel() : _func(), _input(nullptr), _output(nullptr), _perm()
{
}
@@ -113,7 +118,7 @@ void CPPPermuteKernel::configure(const ITensor *input, ITensor *output, const Pe
_output = output;
_perm = perm;
- switch(input->info()->element_size())
+ switch (input->info()->element_size())
{
case 1:
_func = &CPPPermuteKernel::run_permute<uint8_t>;
@@ -152,7 +157,7 @@ void CPPPermuteKernel::run(const Window &window, const ThreadInfo &info)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
- if(_func != nullptr)
+ if (_func != nullptr)
{
(this->*_func)(window);
}
diff --git a/src/core/CPP/kernels/CPPTopKVKernel.cpp b/src/core/CPP/kernels/CPPTopKVKernel.cpp
index d2b54e412e..6ffb68e770 100644
--- a/src/core/CPP/kernels/CPPTopKVKernel.cpp
+++ b/src/core/CPP/kernels/CPPTopKVKernel.cpp
@@ -34,32 +34,34 @@ namespace arm_compute
{
namespace
{
-template <typename T,
- typename std::enable_if<utils::traits::is_floating_point<T>::value, int>::type = 0>
+template <typename T, typename std::enable_if<utils::traits::is_floating_point<T>::value, int>::type = 0>
inline bool greater_than(T a, T b)
{
const T epsilon = std::numeric_limits<T>::epsilon();
return (a - b > epsilon);
}
-template < typename T,
- typename std::enable_if < !utils::traits::is_floating_point<T>::value, int >::type = 0 >
+template <typename T, typename std::enable_if<!utils::traits::is_floating_point<T>::value, int>::type = 0>
inline bool greater_than(T a, T b)
{
return (a > b);
}
-Status validate_arguments(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k)
+Status validate_arguments(const ITensorInfo *predictions,
+ const ITensorInfo *targets,
+ ITensorInfo *output,
+ const unsigned int k)
{
ARM_COMPUTE_UNUSED(k);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(predictions, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(predictions, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::S32, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(targets, 1, DataType::U32);
ARM_COMPUTE_RETURN_ERROR_ON(predictions->num_dimensions() > 2);
ARM_COMPUTE_RETURN_ERROR_ON(targets->num_dimensions() > 1);
ARM_COMPUTE_RETURN_ERROR_ON(targets->dimension(0) != predictions->dimension(1));
// Validate configured output
- if(output->total_size() != 0)
+ if (output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), targets->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
@@ -72,22 +74,23 @@ Status validate_arguments(const ITensorInfo *predictions, const ITensorInfo *tar
template <typename T>
void CPPTopKVKernel::run_topkv()
{
- for(unsigned int i = 0; i < _batch_size; ++i)
+ for (unsigned int i = 0; i < _batch_size; ++i)
{
- const auto target_class_id = *reinterpret_cast<uint32_t *>(_targets->ptr_to_element(Coordinates{ i }));
- const auto predicted_value = *reinterpret_cast<T *>(_predictions->ptr_to_element(Coordinates{ target_class_id, i }));
+ const auto target_class_id = *reinterpret_cast<uint32_t *>(_targets->ptr_to_element(Coordinates{i}));
+ const auto predicted_value =
+ *reinterpret_cast<T *>(_predictions->ptr_to_element(Coordinates{target_class_id, i}));
// The variable rank indicates how many values there are before the target_class_id
unsigned int rank = 0;
- for(unsigned int j = 0; (j < _num_classes) && (rank < _k); ++j)
+ for (unsigned int j = 0; (j < _num_classes) && (rank < _k); ++j)
{
- const auto current_prediction = *reinterpret_cast<T *>(_predictions->ptr_to_element(Coordinates{ j, i }));
- if(greater_than(current_prediction, predicted_value))
+ const auto current_prediction = *reinterpret_cast<T *>(_predictions->ptr_to_element(Coordinates{j, i}));
+ if (greater_than(current_prediction, predicted_value))
{
rank++;
}
}
- *(_output->ptr_to_element(Coordinates{ i })) = static_cast<uint8_t>(rank < _k);
+ *(_output->ptr_to_element(Coordinates{i})) = static_cast<uint8_t>(rank < _k);
}
}
@@ -96,7 +99,10 @@ CPPTopKVKernel::CPPTopKVKernel()
{
}
-void CPPTopKVKernel::configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k)
+void CPPTopKVKernel::configure(const ITensor *predictions,
+ const ITensor *targets,
+ ITensor *output,
+ const unsigned int k)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(predictions, targets, output);
@@ -115,7 +121,10 @@ void CPPTopKVKernel::configure(const ITensor *predictions, const ITensor *target
ICPPKernel::configure(Window()); // Default 1 iteration window
}
-Status CPPTopKVKernel::validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k)
+Status CPPTopKVKernel::validate(const ITensorInfo *predictions,
+ const ITensorInfo *targets,
+ ITensorInfo *output,
+ const unsigned int k)
{
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(predictions, targets, output, k));
return Status{};
@@ -129,7 +138,7 @@ bool CPPTopKVKernel::is_parallelisable() const
void CPPTopKVKernel::run(const Window &window, const ThreadInfo &info)
{
ARM_COMPUTE_UNUSED(window, info);
- switch(_predictions->info()->data_type())
+ switch (_predictions->info()->data_type())
{
case DataType::F32:
run_topkv<float>();
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
index 7ef83fb2c4..b1efe32446 100644
--- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp
+++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
@@ -24,6 +24,7 @@
#include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h"
#include "arm_compute/core/Helpers.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include <cstddef>
@@ -31,8 +32,7 @@
namespace arm_compute
{
-CPPUpsampleKernel::CPPUpsampleKernel()
- : _input(nullptr), _output(nullptr), _info()
+CPPUpsampleKernel::CPPUpsampleKernel() : _input(nullptr), _output(nullptr), _info()
{
}
@@ -82,7 +82,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
const size_t element_size = _input->info()->element_size();
// The fill value is normally 0, but for quantized types '0' corresponds to the offset
- switch(_output->info()->data_type())
+ switch (_output->info()->data_type())
{
case DataType::QASYMM8:
{
@@ -102,7 +102,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
// Create window
Window window_out(window);
- if(data_layout == DataLayout::NCHW)
+ if (data_layout == DataLayout::NCHW)
{
window_out.set(Window::DimX, Window::Dimension(start_width, end_width, stride_width));
window_out.set(Window::DimY, Window::Dimension(start_height, end_height, stride_height));
@@ -117,10 +117,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
Iterator in(_input, window);
Iterator out(_output, window_out);
- execute_window_loop(window, [&](const Coordinates &)
- {
- memcpy(out.ptr(), in.ptr(), element_size);
- },
- in, out);
+ execute_window_loop(
+ window, [&](const Coordinates &) { memcpy(out.ptr(), in.ptr(), element_size); }, in, out);
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute