From afd38f0c617d6f89b2b4532c6c44f116617e2b6f Mon Sep 17 00:00:00 2001 From: Felix Thomasmathibalan Date: Wed, 27 Sep 2023 17:46:17 +0100 Subject: Apply clang-format on repository Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- .../NEON/functions/NEGenerateProposalsLayer.cpp | 187 +++++++++++++-------- 1 file changed, 115 insertions(+), 72 deletions(-) (limited to 'src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp') diff --git a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp index 1c0e736766..1022b4153e 100644 --- a/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp +++ b/src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp @@ -25,11 +25,12 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" + #include "src/common/utils/Log.h" +#include "src/core/helpers/AutoConfiguration.h" #include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.h" #include "src/core/NEON/kernels/NEPadLayerKernel.h" -#include "src/core/helpers/AutoConfiguration.h" namespace arm_compute { @@ -68,42 +69,55 @@ NEGenerateProposalsLayer::NEGenerateProposalsLayer(std::shared_ptrinfo(), deltas->info(), anchors->info(), proposals->info(), scores_out->info(), num_valid_proposals->info(), info)); + ARM_COMPUTE_ERROR_THROW_ON(NEGenerateProposalsLayer::validate(scores->info(), deltas->info(), anchors->info(), + proposals->info(), scores_out->info(), + num_valid_proposals->info(), info)); ARM_COMPUTE_LOG_PARAMS(scores, deltas, anchors, proposals, scores_out, num_valid_proposals, info); _is_nhwc = scores->info()->data_layout() == DataLayout::NHWC; const DataType scores_data_type = scores->info()->data_type(); _is_qasymm8 = scores_data_type == DataType::QASYMM8; - const int num_anchors = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::CHANNEL)); - const int feat_width = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::WIDTH)); - const int feat_height = scores->info()->dimension(get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::HEIGHT)); - const int total_num_anchors = num_anchors * feat_width * feat_height; - const int pre_nms_topN = info.pre_nms_topN(); - const int post_nms_topN = info.post_nms_topN(); - const size_t values_per_roi = info.values_per_roi(); + const int num_anchors = scores->info()->dimension( + get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::CHANNEL)); + const int feat_width = scores->info()->dimension( + get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::WIDTH)); + const int feat_height = scores->info()->dimension( + get_data_layout_dimension_index(scores->info()->data_layout(), DataLayoutDimension::HEIGHT)); + const int total_num_anchors = num_anchors * feat_width * feat_height; + const int pre_nms_topN = info.pre_nms_topN(); + const int post_nms_topN = info.post_nms_topN(); + const size_t values_per_roi = info.values_per_roi(); const QuantizationInfo scores_qinfo = scores->info()->quantization_info(); const DataType rois_data_type = (_is_qasymm8) ? DataType::QASYMM16 : scores_data_type; - const QuantizationInfo rois_qinfo = (_is_qasymm8) ? QuantizationInfo(0.125f, 0) : scores->info()->quantization_info(); + const QuantizationInfo rois_qinfo = + (_is_qasymm8) ? QuantizationInfo(0.125f, 0) : scores->info()->quantization_info(); // Compute all the anchors _memory_group.manage(&_all_anchors); _compute_anchors = std::make_unique(); - _compute_anchors->configure(anchors, &_all_anchors, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())); + _compute_anchors->configure(anchors, &_all_anchors, + ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale())); const TensorShape flatten_shape_deltas(values_per_roi, total_num_anchors); - _deltas_flattened.allocator()->init(TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info())); + _deltas_flattened.allocator()->init( + TensorInfo(flatten_shape_deltas, 1, scores_data_type, deltas->info()->quantization_info())); // Permute and reshape deltas _memory_group.manage(&_deltas_flattened); - if(!_is_nhwc) + if (!_is_nhwc) { _memory_group.manage(&_deltas_permuted); - _permute_deltas.configure(deltas, &_deltas_permuted, PermutationVector{ 2, 0, 1 }); + _permute_deltas.configure(deltas, &_deltas_permuted, PermutationVector{2, 0, 1}); _flatten_deltas.configure(&_deltas_permuted, &_deltas_flattened); _deltas_permuted.allocator()->allocate(); } @@ -117,10 +131,10 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d // Permute and reshape scores _memory_group.manage(&_scores_flattened); - if(!_is_nhwc) + if (!_is_nhwc) { _memory_group.manage(&_scores_permuted); - _permute_scores.configure(scores, &_scores_permuted, PermutationVector{ 2, 0, 1 }); + _permute_scores.configure(scores, &_scores_permuted, PermutationVector{2, 0, 1}); _flatten_scores.configure(&_scores_permuted, &_scores_flattened); _scores_permuted.allocator()->allocate(); } @@ -131,7 +145,7 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d Tensor *anchors_to_use = &_all_anchors; Tensor *deltas_to_use = &_deltas_flattened; - if(_is_qasymm8) + if (_is_qasymm8) { _all_anchors_f32.allocator()->init(TensorInfo(_all_anchors.info()->tensor_shape(), 1, DataType::F32)); _deltas_flattened_f32.allocator()->init(TensorInfo(_deltas_flattened.info()->tensor_shape(), 1, DataType::F32)); @@ -154,11 +168,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d anchors_to_use->allocator()->allocate(); _all_proposals_to_use = &_all_proposals; - if(_is_qasymm8) + if (_is_qasymm8) { _memory_group.manage(&_all_proposals_quantized); // Requantize all_proposals to QASYMM16 with 0.125 scale and 0 offset - _all_proposals_quantized.allocator()->init(TensorInfo(_all_proposals.info()->tensor_shape(), 1, DataType::QASYMM16, QuantizationInfo(0.125f, 0))); + _all_proposals_quantized.allocator()->init( + TensorInfo(_all_proposals.info()->tensor_shape(), 1, DataType::QASYMM16, QuantizationInfo(0.125f, 0))); _quantize_all_proposals.configure(&_all_proposals, &_all_proposals_quantized); _all_proposals.allocator()->allocate(); _all_proposals_to_use = &_all_proposals_quantized; @@ -174,7 +189,8 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d // Note that NMS needs outputs preinitialized. auto_init_if_empty(*scores_out->info(), TensorShape(scores_nms_size), 1, scores_data_type, scores_qinfo); - auto_init_if_empty(*_proposals_4_roi_values.info(), TensorShape(values_per_roi, scores_nms_size), 1, rois_data_type, rois_qinfo); + auto_init_if_empty(*_proposals_4_roi_values.info(), TensorShape(values_per_roi, scores_nms_size), 1, rois_data_type, + rois_qinfo); auto_init_if_empty(*num_valid_proposals->info(), TensorShape(1), 1, DataType::U32); // Initialize temporaries (unused) outputs @@ -187,17 +203,12 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d _memory_group.manage(&_proposals_4_roi_values); - const BoxNMSLimitInfo box_nms_info(0.0f, info.nms_thres(), scores_nms_size, false, NMSType::LINEAR, 0.5f, 0.001f, true, min_size_scaled, info.im_width(), info.im_height()); - _cpp_nms.configure(&_scores_flattened /*scores_in*/, - _all_proposals_to_use /*boxes_in,*/, - nullptr /* batch_splits_in*/, - scores_out /* scores_out*/, - &_proposals_4_roi_values /*boxes_out*/, - &_classes_nms_unused /*classes*/, - nullptr /*batch_splits_out*/, - &_keeps_nms_unused /*keeps*/, - num_valid_proposals /* keeps_size*/, - box_nms_info); + const BoxNMSLimitInfo box_nms_info(0.0f, info.nms_thres(), scores_nms_size, false, NMSType::LINEAR, 0.5f, 0.001f, + true, min_size_scaled, info.im_width(), info.im_height()); + _cpp_nms.configure(&_scores_flattened /*scores_in*/, _all_proposals_to_use /*boxes_in,*/, + nullptr /* batch_splits_in*/, scores_out /* scores_out*/, &_proposals_4_roi_values /*boxes_out*/, + &_classes_nms_unused /*classes*/, nullptr /*batch_splits_out*/, &_keeps_nms_unused /*keeps*/, + num_valid_proposals /* keeps_size*/, box_nms_info); _keeps_nms_unused.allocator()->allocate(); _classes_nms_unused.allocator()->allocate(); @@ -205,12 +216,17 @@ void NEGenerateProposalsLayer::configure(const ITensor *scores, const ITensor *d _scores_flattened.allocator()->allocate(); // Add the first column that represents the batch id. This will be all zeros, as we don't support multiple images - _pad.configure(&_proposals_4_roi_values, proposals, PaddingList{ { 1, 0 } }); + _pad.configure(&_proposals_4_roi_values, proposals, PaddingList{{1, 0}}); _proposals_4_roi_values.allocator()->allocate(); } -Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITensorInfo *deltas, const ITensorInfo *anchors, const ITensorInfo *proposals, const ITensorInfo *scores_out, - const ITensorInfo *num_valid_proposals, const GenerateProposalsInfo &info) +Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, + const ITensorInfo *deltas, + const ITensorInfo *anchors, + const ITensorInfo *proposals, + const ITensorInfo *scores_out, + const ITensorInfo *num_valid_proposals, + const GenerateProposalsInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(scores, deltas, anchors, proposals, scores_out, num_valid_proposals); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(scores, 1, DataType::QASYMM8, DataType::F16, DataType::F32); @@ -218,9 +234,12 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(scores, deltas); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(scores, deltas); - const int num_anchors = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::CHANNEL)); - const int feat_width = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::WIDTH)); - const int feat_height = scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::HEIGHT)); + const int num_anchors = + scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::CHANNEL)); + const int feat_width = + scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::WIDTH)); + const int feat_height = + scores->dimension(get_data_layout_dimension_index(scores->data_layout(), DataLayoutDimension::HEIGHT)); const int num_images = scores->dimension(3); const int total_num_anchors = num_anchors * feat_width * feat_height; const int values_per_roi = info.values_per_roi(); @@ -229,76 +248,100 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens ARM_COMPUTE_RETURN_ERROR_ON(num_images > 1); - if(is_qasymm8) + if (is_qasymm8) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(anchors, 1, DataType::QSYMM16); const UniformQuantizationInfo anchors_qinfo = anchors->quantization_info().uniform(); ARM_COMPUTE_RETURN_ERROR_ON(anchors_qinfo.scale != 0.125f); } - TensorInfo all_anchors_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate(anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()))); - - TensorInfo deltas_permuted_info = deltas->clone()->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)).set_is_resizable(true); - TensorInfo scores_permuted_info = scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true); - if(scores->data_layout() == DataLayout::NHWC) + TensorInfo all_anchors_info( + anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); + ARM_COMPUTE_RETURN_ON_ERROR(NEComputeAllAnchorsKernel::validate( + anchors, &all_anchors_info, ComputeAnchorsInfo(feat_width, feat_height, info.spatial_scale()))); + + TensorInfo deltas_permuted_info = + deltas->clone() + ->set_tensor_shape(TensorShape(values_per_roi * num_anchors, feat_width, feat_height)) + .set_is_resizable(true); + TensorInfo scores_permuted_info = + scores->clone()->set_tensor_shape(TensorShape(num_anchors, feat_width, feat_height)).set_is_resizable(true); + if (scores->data_layout() == DataLayout::NHWC) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(deltas, &deltas_permuted_info); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(scores, &scores_permuted_info); } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(deltas, &deltas_permuted_info, PermutationVector{ 2, 0, 1 })); - ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(scores, &scores_permuted_info, PermutationVector{ 2, 0, 1 })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(deltas, &deltas_permuted_info, PermutationVector{2, 0, 1})); + ARM_COMPUTE_RETURN_ON_ERROR(NEPermute::validate(scores, &scores_permuted_info, PermutationVector{2, 0, 1})); } - TensorInfo deltas_flattened_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); + TensorInfo deltas_flattened_info( + deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&deltas_permuted_info, &deltas_flattened_info)); - TensorInfo scores_flattened_info(scores->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true)); - TensorInfo proposals_4_roi_values(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); + TensorInfo scores_flattened_info( + scores->clone()->set_tensor_shape(TensorShape(1, total_num_anchors)).set_is_resizable(true)); + TensorInfo proposals_4_roi_values( + deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); ARM_COMPUTE_RETURN_ON_ERROR(NEReshapeLayer::validate(&scores_permuted_info, &scores_flattened_info)); TensorInfo *proposals_4_roi_values_to_use = &proposals_4_roi_values; - TensorInfo proposals_4_roi_values_quantized(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); - proposals_4_roi_values_quantized.set_data_type(DataType::QASYMM16).set_quantization_info(QuantizationInfo(0.125f, 0)); - if(is_qasymm8) + TensorInfo proposals_4_roi_values_quantized( + deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true)); + proposals_4_roi_values_quantized.set_data_type(DataType::QASYMM16) + .set_quantization_info(QuantizationInfo(0.125f, 0)); + if (is_qasymm8) { - TensorInfo all_anchors_f32_info(anchors->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); + TensorInfo all_anchors_f32_info(anchors->clone() + ->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)) + .set_is_resizable(true) + .set_data_type(DataType::F32)); ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&all_anchors_info, &all_anchors_f32_info)); - TensorInfo deltas_flattened_f32_info(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info)); - - TensorInfo proposals_4_roi_values_f32(deltas->clone()->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)).set_is_resizable(true).set_data_type(DataType::F32)); - ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info, - BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); - - ARM_COMPUTE_RETURN_ON_ERROR(NEQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized)); + TensorInfo deltas_flattened_f32_info(deltas->clone() + ->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)) + .set_is_resizable(true) + .set_data_type(DataType::F32)); + ARM_COMPUTE_RETURN_ON_ERROR( + NEDequantizationLayer::validate(&deltas_flattened_info, &deltas_flattened_f32_info)); + + TensorInfo proposals_4_roi_values_f32(deltas->clone() + ->set_tensor_shape(TensorShape(values_per_roi, total_num_anchors)) + .set_is_resizable(true) + .set_data_type(DataType::F32)); + ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate( + &all_anchors_f32_info, &proposals_4_roi_values_f32, &deltas_flattened_f32_info, + BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); + + ARM_COMPUTE_RETURN_ON_ERROR( + NEQuantizationLayer::validate(&proposals_4_roi_values_f32, &proposals_4_roi_values_quantized)); proposals_4_roi_values_to_use = &proposals_4_roi_values_quantized; } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEBoundingBoxTransform::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, - BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); + ARM_COMPUTE_RETURN_ON_ERROR( + NEBoundingBoxTransform::validate(&all_anchors_info, &proposals_4_roi_values, &deltas_flattened_info, + BoundingBoxTransformInfo(info.im_width(), info.im_height(), 1.f))); } - ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayer::validate(proposals_4_roi_values_to_use, proposals, PaddingList{ { 1, 0 } })); + ARM_COMPUTE_RETURN_ON_ERROR(NEPadLayer::validate(proposals_4_roi_values_to_use, proposals, PaddingList{{1, 0}})); - if(num_valid_proposals->total_size() > 0) + if (num_valid_proposals->total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(num_valid_proposals->dimension(0) > 1); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(num_valid_proposals, 1, DataType::U32); } - if(proposals->total_size() > 0) + if (proposals->total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON(proposals->num_dimensions() > 2); ARM_COMPUTE_RETURN_ERROR_ON(proposals->dimension(0) != size_t(values_per_roi) + 1); ARM_COMPUTE_RETURN_ERROR_ON(proposals->dimension(1) != size_t(total_num_anchors)); - if(is_qasymm8) + if (is_qasymm8) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(proposals, 1, DataType::QASYMM16); const UniformQuantizationInfo proposals_qinfo = proposals->quantization_info().uniform(); @@ -311,7 +354,7 @@ Status NEGenerateProposalsLayer::validate(const ITensorInfo *scores, const ITens } } - if(scores_out->total_size() > 0) + if (scores_out->total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON(scores_out->num_dimensions() > 1); ARM_COMPUTE_RETURN_ERROR_ON(scores_out->dimension(0) != size_t(total_num_anchors)); @@ -330,7 +373,7 @@ void NEGenerateProposalsLayer::run() NEScheduler::get().schedule(_compute_anchors.get(), Window::DimY); // Transpose and reshape the inputs - if(!_is_nhwc) + if (!_is_nhwc) { _permute_deltas.run(); _permute_scores.run(); @@ -339,7 +382,7 @@ void NEGenerateProposalsLayer::run() _flatten_deltas.run(); _flatten_scores.run(); - if(_is_qasymm8) + if (_is_qasymm8) { _dequantize_anchors.run(); _dequantize_deltas.run(); @@ -348,7 +391,7 @@ void NEGenerateProposalsLayer::run() // Build the boxes _bounding_box.run(); - if(_is_qasymm8) + if (_is_qasymm8) { _quantize_all_proposals.run(); } -- cgit v1.2.1