From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- src/core/CPP/CPPTypes.cpp | 4 +- src/core/CPP/Validate.h | 26 ++-- .../CPPBoxWithNonMaximaSuppressionLimitKernel.cpp | 171 +++++++++++++-------- .../CPP/kernels/CPPNonMaximumSuppressionKernel.cpp | 110 ++++++++----- src/core/CPP/kernels/CPPPermuteKernel.cpp | 45 +++--- src/core/CPP/kernels/CPPTopKVKernel.cpp | 43 ++++-- src/core/CPP/kernels/CPPUpsampleKernel.cpp | 17 +- 7 files changed, 248 insertions(+), 168 deletions(-) (limited to 'src/core/CPP') diff --git a/src/core/CPP/CPPTypes.cpp b/src/core/CPP/CPPTypes.cpp index 6a3f66fd5a..9980db42f3 100644 --- a/src/core/CPP/CPPTypes.cpp +++ b/src/core/CPP/CPPTypes.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/CPP/CPPTypes.h" #include "arm_compute/core/Error.h" + #include "src/common/cpuinfo/CpuInfo.h" #include "src/common/cpuinfo/CpuIsaInfo.h" @@ -43,8 +44,7 @@ CPUInfo &CPUInfo::get() return _cpuinfo; } -CPUInfo::CPUInfo() - : _impl(std::make_unique()) +CPUInfo::CPUInfo() : _impl(std::make_unique()) { _impl->info = cpuinfo::CpuInfo::build(); } diff --git a/src/core/CPP/Validate.h b/src/core/CPP/Validate.h index df192b5131..fe253508cf 100644 --- a/src/core/CPP/Validate.h +++ b/src/core/CPP/Validate.h @@ -38,8 +38,8 @@ namespace arm_compute * * @return Status */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) +inline Status +error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, const ITensorInfo *tensor_info) { bool fp16_kernels_enabled = false; #if defined(ARM_COMPUTE_ENABLE_FP16) && defined(ENABLE_FP16_KERNELS) @@ -47,8 +47,9 @@ inline Status error_on_unsupported_cpu_fp16(const char *function, const char *fi #endif /* defined(ARM_COMPUTE_ENABLE_FP16) && defined(ENABLE_FP16_KERNELS) */ ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::F16) && (!CPUInfo::get().has_fp16() || !fp16_kernels_enabled), - function, file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + (tensor_info->data_type() == DataType::F16) && (!CPUInfo::get().has_fp16() || !fp16_kernels_enabled), function, + file, line, "This CPU architecture does not support F16 data type, you need v8.2 or above"); return Status{}; } @@ -61,8 +62,8 @@ inline Status error_on_unsupported_cpu_fp16(const char *function, const char *fi * * @return Status */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensorInfo *tensor_info) +inline Status +error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, const ITensorInfo *tensor_info) { bool bf16_kernels_enabled = false; #if defined(ARM_COMPUTE_ENABLE_BF16) @@ -70,8 +71,9 @@ inline Status error_on_unsupported_cpu_bf16(const char *function, const char *fi #endif /* defined(ARM_COMPUTE_ENABLE_BF16) */ ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor_info == nullptr, function, file, line); - ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG((tensor_info->data_type() == DataType::BFLOAT16) && (!CPUInfo::get().has_bf16() || !bf16_kernels_enabled), - function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above"); + ARM_COMPUTE_RETURN_ERROR_ON_LOC_MSG( + (tensor_info->data_type() == DataType::BFLOAT16) && (!CPUInfo::get().has_bf16() || !bf16_kernels_enabled), + function, file, line, "This CPU architecture does not support BFloat16 data type, you need v8.6 or above"); return Status{}; } @@ -84,8 +86,8 @@ inline Status error_on_unsupported_cpu_bf16(const char *function, const char *fi * * @return Status */ -inline Status error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, - const ITensor *tensor) +inline Status +error_on_unsupported_cpu_fp16(const char *function, const char *file, const int line, const ITensor *tensor) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_fp16(function, file, line, tensor->info())); @@ -101,8 +103,8 @@ inline Status error_on_unsupported_cpu_fp16(const char *function, const char *fi * * @return Status */ -inline Status error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, - const ITensor *tensor) +inline Status +error_on_unsupported_cpu_bf16(const char *function, const char *file, const int line, const ITensor *tensor) { ARM_COMPUTE_RETURN_ERROR_ON_LOC(tensor == nullptr, function, file, line); ARM_COMPUTE_RETURN_ON_ERROR(::arm_compute::error_on_unsupported_cpu_bf16(function, file, line, tensor->info())); diff --git a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp index 0f405d8e83..02686eb4f6 100644 --- a/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp +++ b/src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" #include "arm_compute/core/Helpers.h" + #include "src/core/helpers/WindowHelpers.h" #include @@ -34,7 +35,11 @@ namespace arm_compute namespace { template -std::vector SoftNMS(const ITensor *proposals, std::vector> &scores_in, std::vector inds, const BoxNMSLimitInfo &info, int class_id) +std::vector SoftNMS(const ITensor *proposals, + std::vector> &scores_in, + std::vector inds, + const BoxNMSLimitInfo &info, + int class_id) { std::vector keep; const int proposals_width = proposals->info()->dimension(1); @@ -45,7 +50,7 @@ std::vector SoftNMS(const ITensor *proposals, std::vector> & std::vector y2(proposals_width); std::vector areas(proposals_width); - for(int i = 0; i < proposals_width; ++i) + for (int i = 0; i < proposals_width; ++i) { x1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4, i))); y1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i))); @@ -56,13 +61,13 @@ std::vector SoftNMS(const ITensor *proposals, std::vector> & // Note: Soft NMS scores have already been initialized with input scores - while(!inds.empty()) + while (!inds.empty()) { // Find proposal with max score among remaining proposals int max_pos = 0; - for(unsigned int i = 1; i < inds.size(); ++i) + for (unsigned int i = 1; i < inds.size(); ++i) { - if(scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)]) + if (scores_in[class_id][inds.at(i)] > scores_in[class_id][inds.at(max_pos)]) { max_pos = i; } @@ -75,7 +80,7 @@ std::vector SoftNMS(const ITensor *proposals, std::vector> & inds.erase(inds.begin()); std::vector sorted_indices_temp; - for(auto idx : inds) + for (auto idx : inds) { const auto xx1 = std::max(x1[idx], x1[element]); const auto yy1 = std::max(y1[idx], y1[element]); @@ -89,7 +94,7 @@ std::vector SoftNMS(const ITensor *proposals, std::vector> & // Update scores based on computed IoU, overlap threshold and NMS method T weight; - switch(info.soft_nms_method()) + switch (info.soft_nms_method()) { case NMSType::LINEAR: weight = (ovr > info.nms()) ? (1.f - ovr) : 1.f; @@ -106,7 +111,7 @@ std::vector SoftNMS(const ITensor *proposals, std::vector> & // Discard boxes with new scores below min threshold and update pending indices scores_in[class_id][idx] *= weight; - if(scores_in[class_id][idx] >= info.soft_nms_min_score_thres()) + if (scores_in[class_id][idx] >= info.soft_nms_min_score_thres()) { sorted_indices_temp.push_back(idx); } @@ -118,7 +123,10 @@ std::vector SoftNMS(const ITensor *proposals, std::vector> & } template -std::vector NonMaximaSuppression(const ITensor *proposals, std::vector sorted_indices, const BoxNMSLimitInfo &info, int class_id) +std::vector NonMaximaSuppression(const ITensor *proposals, + std::vector sorted_indices, + const BoxNMSLimitInfo &info, + int class_id) { std::vector keep; @@ -130,7 +138,7 @@ std::vector NonMaximaSuppression(const ITensor *proposals, std::vector std::vector y2(proposals_width); std::vector areas(proposals_width); - for(int i = 0; i < proposals_width; ++i) + for (int i = 0; i < proposals_width; ++i) { x1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4, i))); y1[i] = *reinterpret_cast(proposals->ptr_to_element(Coordinates(class_id * 4 + 1, i))); @@ -139,7 +147,7 @@ std::vector NonMaximaSuppression(const ITensor *proposals, std::vector areas[i] = (x2[i] - x1[i] + 1.0) * (y2[i] - y1[i] + 1.0); } - while(!sorted_indices.empty()) + while (!sorted_indices.empty()) { int i = sorted_indices.at(0); keep.push_back(i); @@ -148,7 +156,7 @@ std::vector NonMaximaSuppression(const ITensor *proposals, std::vector std::vector new_indices; sorted_indices_temp.erase(sorted_indices_temp.begin()); - for(unsigned int j = 0; j < sorted_indices_temp.size(); ++j) + for (unsigned int j = 0; j < sorted_indices_temp.size(); ++j) { const float xx1 = std::max(x1[sorted_indices_temp.at(j)], x1[i]); const float yy1 = std::max(y1[sorted_indices_temp.at(j)], y1[i]); @@ -163,8 +171,9 @@ std::vector NonMaximaSuppression(const ITensor *proposals, std::vector const float ctr_y = yy1 + (h / 2); // If suppress_size is specified, filter the boxes based on their size and position - const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() && ctr_x < info.im_width() && ctr_y < info.im_height()); - if(ovr <= info.nms() && keep_size) + const bool keep_size = !info.suppress_size() || (w >= info.min_size() && h >= info.min_size() && + ctr_x < info.im_width() && ctr_y < info.im_height()); + if (ovr <= info.nms() && keep_size) { new_indices.push_back(j); } @@ -172,7 +181,7 @@ std::vector NonMaximaSuppression(const ITensor *proposals, std::vector const unsigned int new_indices_size = new_indices.size(); std::vector new_sorted_indices(new_indices_size); - for(unsigned int i = 0; i < new_indices_size; ++i) + for (unsigned int i = 0; i < new_indices_size; ++i) { new_sorted_indices[i] = sorted_indices[new_indices[i] + 1]; } @@ -184,7 +193,15 @@ std::vector NonMaximaSuppression(const ITensor *proposals, std::vector } // namespace CPPBoxWithNonMaximaSuppressionLimitKernel::CPPBoxWithNonMaximaSuppressionLimitKernel() - : _scores_in(nullptr), _boxes_in(nullptr), _batch_splits_in(nullptr), _scores_out(nullptr), _boxes_out(nullptr), _classes(nullptr), _batch_splits_out(nullptr), _keeps(nullptr), _keeps_size(nullptr), + : _scores_in(nullptr), + _boxes_in(nullptr), + _batch_splits_in(nullptr), + _scores_out(nullptr), + _boxes_out(nullptr), + _classes(nullptr), + _batch_splits_out(nullptr), + _keeps(nullptr), + _keeps_size(nullptr), _info() { } @@ -197,7 +214,7 @@ bool CPPBoxWithNonMaximaSuppressionLimitKernel::is_parallelisable() const template void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() { - const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0); + const int batch_size = _batch_splits_in == nullptr ? 1 : _batch_splits_in->info()->dimension(0); const int num_classes = _scores_in->info()->dimension(0); const int scores_count = _scores_in->info()->dimension(1); std::vector total_keep_per_batch(batch_size); @@ -205,51 +222,48 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() int total_keep_count = 0; std::vector> in_scores(num_classes, std::vector(scores_count)); - for(int i = 0; i < scores_count; ++i) + for (int i = 0; i < scores_count; ++i) { - for(int j = 0; j < num_classes; ++j) + for (int j = 0; j < num_classes; ++j) { in_scores[j][i] = *reinterpret_cast(_scores_in->ptr_to_element(Coordinates(j, i))); } } int cur_start_idx = 0; - for(int b = 0; b < batch_size; ++b) + for (int b = 0; b < batch_size; ++b) { // Skip first class if there is more than 1 except if the number of classes is 1. const int j_start = (num_classes == 1 ? 0 : 1); - for(int j = j_start; j < num_classes; ++j) + for (int j = j_start; j < num_classes; ++j) { std::vector cur_scores(scores_count); std::vector inds; - for(int i = 0; i < scores_count; ++i) + for (int i = 0; i < scores_count; ++i) { const T score = in_scores[j][i]; cur_scores[i] = score; - if(score > _info.score_thresh()) + if (score > _info.score_thresh()) { inds.push_back(i); } } - if(_info.soft_nms_enabled()) + if (_info.soft_nms_enabled()) { keeps[j] = SoftNMS(_boxes_in, in_scores, inds, _info, j); } else { std::sort(inds.data(), inds.data() + inds.size(), - [&cur_scores](int lhs, int rhs) - { - return cur_scores[lhs] > cur_scores[rhs]; - }); + [&cur_scores](int lhs, int rhs) { return cur_scores[lhs] > cur_scores[rhs]; }); keeps[j] = NonMaximaSuppression(_boxes_in, inds, _info, j); } total_keep_count += keeps[j].size(); } - if(_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im()) + if (_info.detections_per_im() > 0 && total_keep_count > _info.detections_per_im()) { // merge all scores (represented by indices) together and sort auto get_all_scores_sorted = [&in_scores, &keeps, total_keep_count]() @@ -257,10 +271,10 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() std::vector ret(total_keep_count); int ret_idx = 0; - for(unsigned int i = 1; i < keeps.size(); ++i) + for (unsigned int i = 1; i < keeps.size(); ++i) { auto &cur_keep = keeps[i]; - for(auto &ckv : cur_keep) + for (auto &ckv : cur_keep) { ret[ret_idx++] = in_scores[i][ckv]; } @@ -273,13 +287,13 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() auto all_scores_sorted = get_all_scores_sorted(); const T image_thresh = all_scores_sorted[all_scores_sorted.size() - _info.detections_per_im()]; - for(int j = 1; j < num_classes; ++j) + for (int j = 1; j < num_classes; ++j) { auto &cur_keep = keeps[j]; std::vector new_keeps_j; - for(auto &k : cur_keep) + for (auto &k : cur_keep) { - if(in_scores[j][k] >= image_thresh) + if (in_scores[j][k] >= image_thresh) { new_keeps_j.push_back(k); } @@ -293,40 +307,52 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() // Write results int cur_out_idx = 0; - for(int j = j_start; j < num_classes; ++j) + for (int j = j_start; j < num_classes; ++j) { - auto &cur_keep = keeps[j]; - auto cur_out_scores = reinterpret_cast(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); - auto cur_out_classes = reinterpret_cast(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); - const int box_column = (cur_start_idx + cur_out_idx) * 4; - - for(unsigned int k = 0; k < cur_keep.size(); ++k) + auto &cur_keep = keeps[j]; + auto cur_out_scores = + reinterpret_cast(_scores_out->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); + auto cur_out_classes = + reinterpret_cast(_classes->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx))); + const int box_column = (cur_start_idx + cur_out_idx) * 4; + + for (unsigned int k = 0; k < cur_keep.size(); ++k) { - cur_out_scores[k] = in_scores[j][cur_keep[k]]; - cur_out_classes[k] = static_cast(j); - auto cur_out_box_row0 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k))); - auto cur_out_box_row1 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k))); - auto cur_out_box_row2 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k))); - auto cur_out_box_row3 = reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k))); - *cur_out_box_row0 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k]))); - *cur_out_box_row1 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k]))); - *cur_out_box_row2 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k]))); - *cur_out_box_row3 = *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k]))); + cur_out_scores[k] = in_scores[j][cur_keep[k]]; + cur_out_classes[k] = static_cast(j); + auto cur_out_box_row0 = + reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 0, k))); + auto cur_out_box_row1 = + reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 1, k))); + auto cur_out_box_row2 = + reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 2, k))); + auto cur_out_box_row3 = + reinterpret_cast(_boxes_out->ptr_to_element(Coordinates(box_column + 3, k))); + *cur_out_box_row0 = + *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 0, cur_keep[k]))); + *cur_out_box_row1 = + *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 1, cur_keep[k]))); + *cur_out_box_row2 = + *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 2, cur_keep[k]))); + *cur_out_box_row3 = + *reinterpret_cast(_boxes_in->ptr_to_element(Coordinates(j * 4 + 3, cur_keep[k]))); } cur_out_idx += cur_keep.size(); } - if(_keeps != nullptr) + if (_keeps != nullptr) { cur_out_idx = 0; - for(int j = 0; j < num_classes; ++j) + for (int j = 0; j < num_classes; ++j) { - for(unsigned int i = 0; i < keeps[j].size(); ++i) + for (unsigned int i = 0; i < keeps[j].size(); ++i) { - *reinterpret_cast(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = static_cast(keeps[j].at(i)); + *reinterpret_cast(_keeps->ptr_to_element(Coordinates(cur_start_idx + cur_out_idx + i))) = + static_cast(keeps[j].at(i)); } - *reinterpret_cast(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = keeps[j].size(); + *reinterpret_cast(_keeps_size->ptr_to_element(Coordinates(j + b * num_classes))) = + keeps[j].size(); cur_out_idx += keeps[j].size(); } } @@ -334,17 +360,25 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run_nmslimit() cur_start_idx += total_keep_count; } - if(_batch_splits_out != nullptr) + if (_batch_splits_out != nullptr) { - for(int b = 0; b < batch_size; ++b) + for (int b = 0; b < batch_size; ++b) { *reinterpret_cast(_batch_splits_out->ptr_to_element(Coordinates(b))) = total_keep_per_batch[b]; } } } -void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in, const ITensor *boxes_in, const ITensor *batch_splits_in, ITensor *scores_out, ITensor *boxes_out, ITensor *classes, - ITensor *batch_splits_out, ITensor *keeps, ITensor *keeps_size, const BoxNMSLimitInfo info) +void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_in, + const ITensor *boxes_in, + const ITensor *batch_splits_in, + ITensor *scores_out, + ITensor *boxes_out, + ITensor *classes, + ITensor *batch_splits_out, + ITensor *keeps, + ITensor *keeps_size, + const BoxNMSLimitInfo info) { ARM_COMPUTE_ERROR_ON_NULLPTR(scores_in, boxes_in, scores_out, boxes_out, classes); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores_in, 1, DataType::F16, DataType::F32); @@ -352,25 +386,28 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::configure(const ITensor *scores_ const unsigned int num_classes = scores_in->info()->dimension(0); ARM_COMPUTE_UNUSED(num_classes); - ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), "First dimension of input boxes must be of size 4*num_classes"); - ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), "Input scores and input boxes must have the same number of rows"); + ARM_COMPUTE_ERROR_ON_MSG((4 * num_classes) != boxes_in->info()->dimension(0), + "First dimension of input boxes must be of size 4*num_classes"); + ARM_COMPUTE_ERROR_ON_MSG(scores_in->info()->dimension(1) != boxes_in->info()->dimension(1), + "Input scores and input boxes must have the same number of rows"); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != boxes_out->info()->dimension(1)); ARM_COMPUTE_ERROR_ON(boxes_out->info()->dimension(0) != 4); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != classes->info()->dimension(0)); - if(keeps != nullptr) + if (keeps != nullptr) { - ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, "keeps_size cannot be nullptr if keeps has to be provided as output"); + ARM_COMPUTE_ERROR_ON_MSG(keeps_size == nullptr, + "keeps_size cannot be nullptr if keeps has to be provided as output"); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, keeps); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(keeps_size, 1, DataType::U32); ARM_COMPUTE_ERROR_ON(scores_out->info()->dimension(0) != keeps->info()->dimension(0)); ARM_COMPUTE_ERROR_ON(num_classes != keeps_size->info()->dimension(0)); } - if(batch_splits_in != nullptr) + if (batch_splits_in != nullptr) { ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_in); } - if(batch_splits_out != nullptr) + if (batch_splits_out != nullptr) { ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(scores_in, batch_splits_out); } @@ -399,7 +436,7 @@ void CPPBoxWithNonMaximaSuppressionLimitKernel::run(const Window &window, const ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IKernel::window(), window); - switch(_scores_in->info()->data_type()) + switch (_scores_in->info()->data_type()) { case DataType::F32: run_nmslimit(); diff --git a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp index c1187ff2b3..1224ec14a7 100644 --- a/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp +++ b/src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp @@ -35,15 +35,22 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo *bboxes, const ITensorInfo *scores, const ITensorInfo *output_indices, unsigned int max_output_size, - const float score_threshold, const float iou_threshold) +Status validate_arguments(const ITensorInfo *bboxes, + const ITensorInfo *scores, + const ITensorInfo *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(bboxes, scores, output_indices); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bboxes, 1, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output_indices, 1, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(bboxes->num_dimensions() > 2, "The bboxes tensor must be a 2-D float tensor of shape [4, num_boxes]."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(scores->num_dimensions() > 1, "The scores tensor must be a 1-D float tensor of shape [num_boxes]."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_indices->num_dimensions() > 1, "The indices must be 1-D integer tensor of shape [M], where max_output_size <= M"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(bboxes->num_dimensions() > 2, + "The bboxes tensor must be a 2-D float tensor of shape [4, num_boxes]."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(scores->num_dimensions() > 1, + "The scores tensor must be a 1-D float tensor of shape [num_boxes]."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_indices->num_dimensions() > 1, + "The indices must be 1-D integer tensor of shape [M], where max_output_size <= M"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(bboxes, scores); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output_indices->dimension(0) == 0, "Indices tensor must be bigger than 0"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(max_output_size == 0, "Max size cannot be 0"); @@ -55,15 +62,26 @@ Status validate_arguments(const ITensorInfo *bboxes, const ITensorInfo *scores, } // namespace CPPNonMaximumSuppressionKernel::CPPNonMaximumSuppressionKernel() - : _input_bboxes(nullptr), _input_scores(nullptr), _output_indices(nullptr), _max_output_size(0), _score_threshold(0.f), _iou_threshold(0.f), _num_boxes(0) + : _input_bboxes(nullptr), + _input_scores(nullptr), + _output_indices(nullptr), + _max_output_size(0), + _score_threshold(0.f), + _iou_threshold(0.f), + _num_boxes(0) { } -void CPPNonMaximumSuppressionKernel::configure(const ITensor *input_bboxes, const ITensor *input_scores, ITensor *output_indices, - unsigned int max_output_size, const float score_threshold, const float iou_threshold) +void CPPNonMaximumSuppressionKernel::configure(const ITensor *input_bboxes, + const ITensor *input_scores, + ITensor *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold) { ARM_COMPUTE_ERROR_ON_NULLPTR(input_bboxes, input_scores, output_indices); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_bboxes->info(), input_scores->info(), output_indices->info(), max_output_size, score_threshold, iou_threshold)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input_bboxes->info(), input_scores->info(), output_indices->info(), + max_output_size, score_threshold, iou_threshold)); auto_init_if_empty(*output_indices->info(), TensorShape(max_output_size), 1, DataType::U8, QuantizationInfo()); @@ -82,10 +100,15 @@ void CPPNonMaximumSuppressionKernel::configure(const ITensor *input_bboxes, cons ICPPKernel::configure(win); } -Status CPPNonMaximumSuppressionKernel::validate(const ITensorInfo *bboxes, const ITensorInfo *scores, const ITensorInfo *output_indices, - unsigned int max_output_size, const float score_threshold, const float iou_threshold) +Status CPPNonMaximumSuppressionKernel::validate(const ITensorInfo *bboxes, + const ITensorInfo *scores, + const ITensorInfo *output_indices, + unsigned int max_output_size, + const float score_threshold, + const float iou_threshold) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(bboxes, scores, output_indices, max_output_size, score_threshold, iou_threshold)); + ARM_COMPUTE_RETURN_ON_ERROR( + validate_arguments(bboxes, scores, output_indices, max_output_size, score_threshold, iou_threshold)); return Status{}; } @@ -99,10 +122,10 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo // Auxiliary tensors std::vector indices_above_thd; std::vector scores_above_thd; - for(unsigned int i = 0; i < _num_boxes; ++i) + for (unsigned int i = 0; i < _num_boxes; ++i) { const float score_i = *(reinterpret_cast(_input_scores->ptr_to_element(Coordinates(i)))); - if(score_i >= _score_threshold) + if (score_i >= _score_threshold) { scores_above_thd.emplace_back(score_i); indices_above_thd.emplace_back(i); @@ -114,12 +137,9 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo std::vector sorted_indices; sorted_indices.resize(num_above_thd); std::iota(sorted_indices.data(), sorted_indices.data() + num_above_thd, 0); - std::sort(std::begin(sorted_indices), - std::end(sorted_indices), + std::sort(std::begin(sorted_indices), std::end(sorted_indices), [&](unsigned int first, unsigned int second) - { - return scores_above_thd[first] > scores_above_thd[second]; - }); + { return scores_above_thd[first] > scores_above_thd[second]; }); // Number of output is the minimum between max_detection and the scores above the threshold const unsigned int num_output = std::min(_max_output_size, num_above_thd); @@ -127,19 +147,20 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo std::vector visited(num_above_thd, false); // Keep only boxes with small IoU - for(unsigned int i = 0; i < num_above_thd; ++i) + for (unsigned int i = 0; i < num_above_thd; ++i) { // Check if the output is full - if(output_idx >= num_output) + if (output_idx >= num_output) { break; } // Check if it was already visited, if not add it to the output and update the indices counter - if(!visited[sorted_indices[i]]) + if (!visited[sorted_indices[i]]) { - *(reinterpret_cast(_output_indices->ptr_to_element(Coordinates(output_idx)))) = indices_above_thd[sorted_indices[i]]; - visited[sorted_indices[i]] = true; + *(reinterpret_cast(_output_indices->ptr_to_element(Coordinates(output_idx)))) = + indices_above_thd[sorted_indices[i]]; + visited[sorted_indices[i]] = true; ++output_idx; } else @@ -148,28 +169,36 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo } // Once added one element at the output check if the next ones overlap and can be skipped - for(unsigned int j = i + 1; j < num_above_thd; ++j) + for (unsigned int j = i + 1; j < num_above_thd; ++j) { - if(!visited[sorted_indices[j]]) + if (!visited[sorted_indices[j]]) { // Calculate IoU const unsigned int i_index = indices_above_thd[sorted_indices[i]]; const unsigned int j_index = indices_above_thd[sorted_indices[j]]; // Box-corner format: xmin, ymin, xmax, ymax - const auto box_i_xmin = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(0, i_index)))); - const auto box_i_ymin = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(1, i_index)))); - const auto box_i_xmax = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(2, i_index)))); - const auto box_i_ymax = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(3, i_index)))); - - const auto box_j_xmin = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(0, j_index)))); - const auto box_j_ymin = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(1, j_index)))); - const auto box_j_xmax = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(2, j_index)))); - const auto box_j_ymax = *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(3, j_index)))); + const auto box_i_xmin = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(0, i_index)))); + const auto box_i_ymin = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(1, i_index)))); + const auto box_i_xmax = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(2, i_index)))); + const auto box_i_ymax = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(3, i_index)))); + + const auto box_j_xmin = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(0, j_index)))); + const auto box_j_ymin = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(1, j_index)))); + const auto box_j_xmax = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(2, j_index)))); + const auto box_j_ymax = + *(reinterpret_cast(_input_bboxes->ptr_to_element(Coordinates(3, j_index)))); const float area_i = (box_i_xmax - box_i_xmin) * (box_i_ymax - box_i_ymin); const float area_j = (box_j_xmax - box_j_xmin) * (box_j_ymax - box_j_ymin); float overlap; - if(area_i <= 0 || area_j <= 0) + if (area_i <= 0 || area_j <= 0) { overlap = 0.0f; } @@ -179,11 +208,12 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo const auto x_min_intersection = std::max(box_i_xmin, box_j_xmin); const auto y_max_intersection = std::min(box_i_ymax, box_j_ymax); const auto x_max_intersection = std::min(box_i_xmax, box_j_xmax); - const auto area_intersection = std::max(y_max_intersection - y_min_intersection, 0.0f) * std::max(x_max_intersection - x_min_intersection, 0.0f); - overlap = area_intersection / (area_i + area_j - area_intersection); + const auto area_intersection = std::max(y_max_intersection - y_min_intersection, 0.0f) * + std::max(x_max_intersection - x_min_intersection, 0.0f); + overlap = area_intersection / (area_i + area_j - area_intersection); } - if(overlap > _iou_threshold) + if (overlap > _iou_threshold) { visited[sorted_indices[j]] = true; } @@ -192,7 +222,7 @@ void CPPNonMaximumSuppressionKernel::run(const Window &window, const ThreadInfo } // The output could be full but not the output indices tensor // Instead return values not valid we put -1 - for(; output_idx < _max_output_size; ++output_idx) + for (; output_idx < _max_output_size; ++output_idx) { *(reinterpret_cast(_output_indices->ptr_to_element(Coordinates(output_idx)))) = -1; } diff --git a/src/core/CPP/kernels/CPPPermuteKernel.cpp b/src/core/CPP/kernels/CPPPermuteKernel.cpp index 054c7bf05a..e68090d82b 100644 --- a/src/core/CPP/kernels/CPPPermuteKernel.cpp +++ b/src/core/CPP/kernels/CPPPermuteKernel.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" @@ -43,7 +44,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c const TensorShape output_shape = misc::shape_calculator::compute_permutation_output_shape(*input, perm); // Validate configured output - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -65,7 +66,7 @@ void CPPPermuteKernel::run_permute(const Window &window) // Create output window Window window_out(window); const Window::Dimension zero_window = Window::Dimension(0, 0, 0); - for(size_t d = 0; d <= _perm.num_dimensions(); ++d) + for (size_t d = 0; d <= _perm.num_dimensions(); ++d) { window_out.set(d, zero_window); } @@ -74,28 +75,32 @@ void CPPPermuteKernel::run_permute(const Window &window) Iterator in(_input, window); Iterator out(_output, window_out); - if(_input->info()->num_dimensions() <= 3) + if (_input->info()->num_dimensions() <= 3) { - execute_window_loop(window, [&](const Coordinates & id) - { - const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2]; - *(reinterpret_cast(out.ptr() + idx)) = *(reinterpret_cast(in.ptr())); - }, - in, out); + execute_window_loop( + window, + [&](const Coordinates &id) + { + const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2]; + *(reinterpret_cast(out.ptr() + idx)) = *(reinterpret_cast(in.ptr())); + }, + in, out); } - else if(_input->info()->num_dimensions() >= 4) + else if (_input->info()->num_dimensions() >= 4) { - execute_window_loop(window, [&](const Coordinates & id) - { - const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2] + id[3] * perm_strides[3]; - *(reinterpret_cast(out.ptr() + idx)) = *(reinterpret_cast(in.ptr())); - }, - in, out); + execute_window_loop( + window, + [&](const Coordinates &id) + { + const int idx = id[0] * perm_strides[0] + id[1] * perm_strides[1] + id[2] * perm_strides[2] + + id[3] * perm_strides[3]; + *(reinterpret_cast(out.ptr() + idx)) = *(reinterpret_cast(in.ptr())); + }, + in, out); } } -CPPPermuteKernel::CPPPermuteKernel() - : _func(), _input(nullptr), _output(nullptr), _perm() +CPPPermuteKernel::CPPPermuteKernel() : _func(), _input(nullptr), _output(nullptr), _perm() { } @@ -113,7 +118,7 @@ void CPPPermuteKernel::configure(const ITensor *input, ITensor *output, const Pe _output = output; _perm = perm; - switch(input->info()->element_size()) + switch (input->info()->element_size()) { case 1: _func = &CPPPermuteKernel::run_permute; @@ -152,7 +157,7 @@ void CPPPermuteKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - if(_func != nullptr) + if (_func != nullptr) { (this->*_func)(window); } diff --git a/src/core/CPP/kernels/CPPTopKVKernel.cpp b/src/core/CPP/kernels/CPPTopKVKernel.cpp index d2b54e412e..6ffb68e770 100644 --- a/src/core/CPP/kernels/CPPTopKVKernel.cpp +++ b/src/core/CPP/kernels/CPPTopKVKernel.cpp @@ -34,32 +34,34 @@ namespace arm_compute { namespace { -template ::value, int>::type = 0> +template ::value, int>::type = 0> inline bool greater_than(T a, T b) { const T epsilon = std::numeric_limits::epsilon(); return (a - b > epsilon); } -template < typename T, - typename std::enable_if < !utils::traits::is_floating_point::value, int >::type = 0 > +template ::value, int>::type = 0> inline bool greater_than(T a, T b) { return (a > b); } -Status validate_arguments(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k) +Status validate_arguments(const ITensorInfo *predictions, + const ITensorInfo *targets, + ITensorInfo *output, + const unsigned int k) { ARM_COMPUTE_UNUSED(k); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(predictions, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(predictions, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(targets, 1, DataType::U32); ARM_COMPUTE_RETURN_ERROR_ON(predictions->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(targets->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(targets->dimension(0) != predictions->dimension(1)); // Validate configured output - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), targets->tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); @@ -72,22 +74,23 @@ Status validate_arguments(const ITensorInfo *predictions, const ITensorInfo *tar template void CPPTopKVKernel::run_topkv() { - for(unsigned int i = 0; i < _batch_size; ++i) + for (unsigned int i = 0; i < _batch_size; ++i) { - const auto target_class_id = *reinterpret_cast(_targets->ptr_to_element(Coordinates{ i })); - const auto predicted_value = *reinterpret_cast(_predictions->ptr_to_element(Coordinates{ target_class_id, i })); + const auto target_class_id = *reinterpret_cast(_targets->ptr_to_element(Coordinates{i})); + const auto predicted_value = + *reinterpret_cast(_predictions->ptr_to_element(Coordinates{target_class_id, i})); // The variable rank indicates how many values there are before the target_class_id unsigned int rank = 0; - for(unsigned int j = 0; (j < _num_classes) && (rank < _k); ++j) + for (unsigned int j = 0; (j < _num_classes) && (rank < _k); ++j) { - const auto current_prediction = *reinterpret_cast(_predictions->ptr_to_element(Coordinates{ j, i })); - if(greater_than(current_prediction, predicted_value)) + const auto current_prediction = *reinterpret_cast(_predictions->ptr_to_element(Coordinates{j, i})); + if (greater_than(current_prediction, predicted_value)) { rank++; } } - *(_output->ptr_to_element(Coordinates{ i })) = static_cast(rank < _k); + *(_output->ptr_to_element(Coordinates{i})) = static_cast(rank < _k); } } @@ -96,7 +99,10 @@ CPPTopKVKernel::CPPTopKVKernel() { } -void CPPTopKVKernel::configure(const ITensor *predictions, const ITensor *targets, ITensor *output, const unsigned int k) +void CPPTopKVKernel::configure(const ITensor *predictions, + const ITensor *targets, + ITensor *output, + const unsigned int k) { ARM_COMPUTE_ERROR_ON_NULLPTR(predictions, targets, output); @@ -115,7 +121,10 @@ void CPPTopKVKernel::configure(const ITensor *predictions, const ITensor *target ICPPKernel::configure(Window()); // Default 1 iteration window } -Status CPPTopKVKernel::validate(const ITensorInfo *predictions, const ITensorInfo *targets, ITensorInfo *output, const unsigned int k) +Status CPPTopKVKernel::validate(const ITensorInfo *predictions, + const ITensorInfo *targets, + ITensorInfo *output, + const unsigned int k) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(predictions, targets, output, k)); return Status{}; @@ -129,7 +138,7 @@ bool CPPTopKVKernel::is_parallelisable() const void CPPTopKVKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(window, info); - switch(_predictions->info()->data_type()) + switch (_predictions->info()->data_type()) { case DataType::F32: run_topkv(); diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp index 7ef83fb2c4..b1efe32446 100644 --- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp +++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp @@ -24,6 +24,7 @@ #include "arm_compute/core/CPP/kernels/CPPUpsampleKernel.h" #include "arm_compute/core/Helpers.h" + #include "src/core/helpers/WindowHelpers.h" #include @@ -31,8 +32,7 @@ namespace arm_compute { -CPPUpsampleKernel::CPPUpsampleKernel() - : _input(nullptr), _output(nullptr), _info() +CPPUpsampleKernel::CPPUpsampleKernel() : _input(nullptr), _output(nullptr), _info() { } @@ -82,7 +82,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) const size_t element_size = _input->info()->element_size(); // The fill value is normally 0, but for quantized types '0' corresponds to the offset - switch(_output->info()->data_type()) + switch (_output->info()->data_type()) { case DataType::QASYMM8: { @@ -102,7 +102,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) // Create window Window window_out(window); - if(data_layout == DataLayout::NCHW) + if (data_layout == DataLayout::NCHW) { window_out.set(Window::DimX, Window::Dimension(start_width, end_width, stride_width)); window_out.set(Window::DimY, Window::Dimension(start_height, end_height, stride_height)); @@ -117,10 +117,7 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) Iterator in(_input, window); Iterator out(_output, window_out); - execute_window_loop(window, [&](const Coordinates &) - { - memcpy(out.ptr(), in.ptr(), element_size); - }, - in, out); + execute_window_loop( + window, [&](const Coordinates &) { memcpy(out.ptr(), in.ptr(), element_size); }, in, out); } -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute -- cgit v1.2.1